1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_BC_H_ 28 #define SB_BC_H_ 29 30 #include <stdint.h> 31 #include "r600_isa.h" 32 33 #include <cstdio> 34 #include <string> 35 #include <vector> 36 #include <stack> 37 38 struct r600_bytecode; 39 struct r600_shader; 40 41 namespace r600_sb { 42 43 class hw_encoding_format; 44 class node; 45 class alu_node; 46 class cf_node; 47 class fetch_node; 48 class alu_group_node; 49 class region_node; 50 class shader; 51 class value; 52 53 class sb_ostream { 54 public: sb_ostream()55 sb_ostream() {} 56 57 virtual void write(const char *s) = 0; 58 59 sb_ostream& operator <<(const char *s) { 60 write(s); 61 return *this; 62 } 63 64 sb_ostream& operator <<(const std::string& s) { 65 return *this << s.c_str(); 66 } 67 68 sb_ostream& operator <<(void *p) { 69 char b[32]; 70 sprintf(b, "%p", p); 71 return *this << b; 72 } 73 74 sb_ostream& operator <<(char c) { 75 char b[2]; 76 sprintf(b, "%c", c); 77 return *this << b; 78 } 79 80 sb_ostream& operator <<(int n) { 81 char b[32]; 82 sprintf(b, "%d", n); 83 return *this << b; 84 } 85 86 sb_ostream& operator <<(unsigned n) { 87 char b[32]; 88 sprintf(b, "%u", n); 89 return *this << b; 90 } 91 92 sb_ostream& operator <<(double d) { 93 char b[32]; 94 snprintf(b, 32, "%g", d); 95 return *this << b; 96 } 97 98 // print as field of specified width, right aligned print_w(int n,int width)99 void print_w(int n, int width) { 100 char b[256],f[8]; 101 sprintf(f, "%%%dd", width); 102 snprintf(b, 256, f, n); 103 write(b); 104 } 105 106 // print as field of specified width, left aligned print_wl(int n,int width)107 void print_wl(int n, int width) { 108 char b[256],f[8]; 109 sprintf(f, "%%-%dd", width); 110 snprintf(b, 256, f, n); 111 write(b); 112 } 113 114 // print as field of specified width, left aligned print_wl(const std::string & s,int width)115 void print_wl(const std::string &s, int width) { 116 write(s.c_str()); 117 int l = s.length(); 118 while (l++ < width) { 119 write(" "); 120 } 121 } 122 123 // print int as field of specified width, right aligned, zero-padded print_zw(int n,int width)124 void print_zw(int n, int width) { 125 char b[256],f[8]; 126 sprintf(f, "%%0%dd", width); 127 snprintf(b, 256, f, n); 128 write(b); 129 } 130 131 // print int as field of specified width, right aligned, zero-padded, hex print_zw_hex(int n,int width)132 void print_zw_hex(int n, int width) { 133 char b[256],f[8]; 134 sprintf(f, "%%0%dx", width); 135 snprintf(b, 256, f, n); 136 write(b); 137 } 138 }; 139 140 class sb_ostringstream : public sb_ostream { 141 std::string data; 142 public: sb_ostringstream()143 sb_ostringstream() : data() {} 144 write(const char * s)145 virtual void write(const char *s) { 146 data += s; 147 } 148 clear()149 void clear() { data.clear(); } 150 c_str()151 const char* c_str() { return data.c_str(); } str()152 std::string& str() { return data; } 153 }; 154 155 class sb_log : public sb_ostream { 156 FILE *o; 157 public: sb_log()158 sb_log() : o(stderr) {} 159 write(const char * s)160 virtual void write(const char *s) { 161 fputs(s, o); 162 } 163 }; 164 165 extern sb_log sblog; 166 167 enum shader_target 168 { 169 TARGET_UNKNOWN, 170 TARGET_VS, 171 TARGET_ES, 172 TARGET_PS, 173 TARGET_GS, 174 TARGET_GS_COPY, 175 TARGET_COMPUTE, 176 TARGET_FETCH, 177 TARGET_HS, 178 TARGET_LS, 179 180 TARGET_NUM 181 }; 182 183 enum sb_hw_class_bits 184 { 185 HB_R6 = (1<<0), 186 HB_R7 = (1<<1), 187 HB_EG = (1<<2), 188 HB_CM = (1<<3), 189 190 HB_R6R7 = (HB_R6 | HB_R7), 191 HB_EGCM = (HB_EG | HB_CM), 192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), 193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), 194 195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) 196 }; 197 198 enum sb_hw_chip 199 { 200 HW_CHIP_UNKNOWN, 201 HW_CHIP_R600, 202 HW_CHIP_RV610, 203 HW_CHIP_RV630, 204 HW_CHIP_RV670, 205 HW_CHIP_RV620, 206 HW_CHIP_RV635, 207 HW_CHIP_RS780, 208 HW_CHIP_RS880, 209 HW_CHIP_RV770, 210 HW_CHIP_RV730, 211 HW_CHIP_RV710, 212 HW_CHIP_RV740, 213 HW_CHIP_CEDAR, 214 HW_CHIP_REDWOOD, 215 HW_CHIP_JUNIPER, 216 HW_CHIP_CYPRESS, 217 HW_CHIP_HEMLOCK, 218 HW_CHIP_PALM, 219 HW_CHIP_SUMO, 220 HW_CHIP_SUMO2, 221 HW_CHIP_BARTS, 222 HW_CHIP_TURKS, 223 HW_CHIP_CAICOS, 224 HW_CHIP_CAYMAN, 225 HW_CHIP_ARUBA 226 }; 227 228 enum sb_hw_class 229 { 230 HW_CLASS_UNKNOWN, 231 HW_CLASS_R600, 232 HW_CLASS_R700, 233 HW_CLASS_EVERGREEN, 234 HW_CLASS_CAYMAN 235 }; 236 237 enum alu_slots { 238 SLOT_X = 0, 239 SLOT_Y = 1, 240 SLOT_Z = 2, 241 SLOT_W = 3, 242 SLOT_TRANS = 4 243 }; 244 245 enum misc_consts { 246 MAX_ALU_LITERALS = 4, 247 MAX_ALU_SLOTS = 128, 248 MAX_GPR = 128, 249 MAX_CHAN = 4 250 251 }; 252 253 enum alu_src_sel { 254 255 ALU_SRC_LDS_OQ_A = 219, 256 ALU_SRC_LDS_OQ_B = 220, 257 ALU_SRC_LDS_OQ_A_POP = 221, 258 ALU_SRC_LDS_OQ_B_POP = 222, 259 ALU_SRC_LDS_DIRECT_A = 223, 260 ALU_SRC_LDS_DIRECT_B = 224, 261 ALU_SRC_TIME_HI = 227, 262 ALU_SRC_TIME_LO = 228, 263 ALU_SRC_MASK_HI = 229, 264 ALU_SRC_MASK_LO = 230, 265 ALU_SRC_HW_WAVE_ID = 231, 266 ALU_SRC_SIMD_ID = 232, 267 ALU_SRC_SE_ID = 233, 268 ALU_SRC_HW_THREADGRP_ID = 234, 269 ALU_SRC_WAVE_ID_IN_GRP = 235, 270 ALU_SRC_NUM_THREADGRP_WAVES = 236, 271 ALU_SRC_HW_ALU_ODD = 237, 272 ALU_SRC_LOOP_IDX = 238, 273 ALU_SRC_PARAM_BASE_ADDR = 240, 274 ALU_SRC_NEW_PRIM_MASK = 241, 275 ALU_SRC_PRIM_MASK_HI = 242, 276 ALU_SRC_PRIM_MASK_LO = 243, 277 ALU_SRC_1_DBL_L = 244, 278 ALU_SRC_1_DBL_M = 245, 279 ALU_SRC_0_5_DBL_L = 246, 280 ALU_SRC_0_5_DBL_M = 247, 281 ALU_SRC_0 = 248, 282 ALU_SRC_1 = 249, 283 ALU_SRC_1_INT = 250, 284 ALU_SRC_M_1_INT = 251, 285 ALU_SRC_0_5 = 252, 286 ALU_SRC_LITERAL = 253, 287 ALU_SRC_PV = 254, 288 ALU_SRC_PS = 255, 289 290 ALU_SRC_PARAM_OFFSET = 448 291 }; 292 293 enum alu_predicate_select 294 { 295 PRED_SEL_OFF = 0, 296 // RESERVED = 1, 297 PRED_SEL_0 = 2, 298 PRED_SEL_1 = 3 299 }; 300 301 302 enum alu_omod { 303 OMOD_OFF = 0, 304 OMOD_M2 = 1, 305 OMOD_M4 = 2, 306 OMOD_D2 = 3 307 }; 308 309 enum alu_index_mode { 310 INDEX_AR_X = 0, 311 INDEX_AR_Y_R600 = 1, 312 INDEX_AR_Z_R600 = 2, 313 INDEX_AR_W_R600 = 3, 314 315 INDEX_LOOP = 4, 316 INDEX_GLOBAL = 5, 317 INDEX_GLOBAL_AR_X = 6 318 }; 319 320 enum alu_cayman_mova_dst { 321 CM_MOVADST_AR_X, 322 CM_MOVADST_PC, 323 CM_MOVADST_IDX0, 324 CM_MOVADST_IDX1, 325 CM_MOVADST_CG0, // clause-global byte 0 326 CM_MOVADST_CG1, 327 CM_MOVADST_CG2, 328 CM_MOVADST_CG3 329 }; 330 331 enum alu_cayman_exec_mask_op { 332 CM_EMO_DEACTIVATE, 333 CM_EMO_BREAK, 334 CM_EMO_CONTINUE, 335 CM_EMO_KILL 336 }; 337 338 339 enum cf_exp_type { 340 EXP_PIXEL, 341 EXP_POS, 342 EXP_PARAM, 343 344 EXP_TYPE_COUNT 345 }; 346 347 enum cf_mem_type { 348 MEM_WRITE, 349 MEM_WRITE_IND, 350 MEM_WRITE_ACK, 351 MEM_WRITE_IND_ACK 352 }; 353 354 355 enum alu_kcache_mode { 356 KC_LOCK_NONE, 357 KC_LOCK_1, 358 KC_LOCK_2, 359 KC_LOCK_LOOP 360 }; 361 362 enum alu_kcache_index_mode { 363 KC_INDEX_NONE, 364 KC_INDEX_0, 365 KC_INDEX_1, 366 KC_INDEX_INVALID 367 }; 368 369 enum chan_select { 370 SEL_X = 0, 371 SEL_Y = 1, 372 SEL_Z = 2, 373 SEL_W = 3, 374 SEL_0 = 4, 375 SEL_1 = 5, 376 // RESERVED = 6, 377 SEL_MASK = 7 378 }; 379 380 enum bank_swizzle { 381 VEC_012 = 0, 382 VEC_021 = 1, 383 VEC_120 = 2, 384 VEC_102 = 3, 385 VEC_201 = 4, 386 VEC_210 = 5, 387 388 VEC_NUM = 6, 389 390 SCL_210 = 0, 391 SCL_122 = 1, 392 SCL_212 = 2, 393 SCL_221 = 3, 394 395 SCL_NUM = 4 396 397 }; 398 399 enum sched_queue_id { 400 SQ_CF, 401 SQ_ALU, 402 SQ_TEX, 403 SQ_VTX, 404 SQ_GDS, 405 406 SQ_NUM 407 }; 408 409 struct literal { 410 union { 411 int32_t i; 412 uint32_t u; 413 float f; 414 }; 415 iliteral416 literal(int32_t i = 0) : i(i) {} literalliteral417 literal(uint32_t u) : u(u) {} literalliteral418 literal(float f) : f(f) {} literalliteral419 literal(double f) : f(f) {} uint32_tliteral420 operator uint32_t() const { return u; } 421 bool operator ==(literal l) { return u == l.u; } 422 bool operator ==(int v_int) { return i == v_int; } 423 bool operator ==(unsigned v_uns) { return u == v_uns; } 424 }; 425 426 struct bc_kcache { 427 unsigned mode; 428 unsigned bank; 429 unsigned addr; 430 unsigned index_mode; 431 } ; 432 433 // TODO optimize bc structures 434 435 struct bc_cf { 436 437 bc_kcache kc[4]; 438 439 unsigned id; 440 441 442 const cf_op_info * op_ptr; 443 unsigned op; 444 445 unsigned addr:32; 446 447 unsigned alt_const:1; 448 unsigned uses_waterfall:1; 449 450 unsigned barrier:1; 451 unsigned count:7; 452 unsigned pop_count:3; 453 unsigned call_count:6; 454 unsigned whole_quad_mode:1; 455 unsigned valid_pixel_mode:1; 456 457 unsigned jumptable_sel:3; 458 unsigned cf_const:5; 459 unsigned cond:2; 460 unsigned end_of_program:1; 461 462 unsigned array_base:13; 463 unsigned elem_size:2; 464 unsigned index_gpr:7; 465 unsigned rw_gpr:7; 466 unsigned rw_rel:1; 467 unsigned type:2; 468 469 unsigned burst_count:4; 470 unsigned mark:1; 471 unsigned sel[4]; 472 473 unsigned array_size:12; 474 unsigned comp_mask:4; 475 476 unsigned rat_id:4; 477 unsigned rat_inst:6; 478 unsigned rat_index_mode:2; 479 set_opbc_cf480 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } 481 is_alu_extendedbc_cf482 bool is_alu_extended() { 483 assert(op_ptr->flags & CF_ALU); 484 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || 485 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || 486 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; 487 } 488 489 }; 490 491 struct bc_alu_src { 492 unsigned sel:9; 493 unsigned chan:2; 494 unsigned neg:1; 495 unsigned abs:1; 496 unsigned rel:1; 497 literal value; 498 }; 499 500 struct bc_alu { 501 const alu_op_info * op_ptr; 502 unsigned op; 503 504 bc_alu_src src[3]; 505 506 unsigned dst_gpr:7; 507 unsigned dst_chan:2; 508 unsigned dst_rel:1; 509 unsigned clamp:1; 510 unsigned omod:2; 511 unsigned bank_swizzle:3; 512 513 unsigned index_mode:3; 514 unsigned last:1; 515 unsigned pred_sel:2; 516 517 unsigned fog_merge:1; 518 unsigned write_mask:1; 519 unsigned update_exec_mask:1; 520 unsigned update_pred:1; 521 522 unsigned slot:3; 523 524 unsigned lds_idx_offset:6; 525 526 alu_op_flags slot_flags; 527 set_opbc_alu528 void set_op(unsigned op) { 529 this->op = op; 530 op_ptr = r600_isa_alu(op); 531 } 532 }; 533 534 struct bc_fetch { 535 const fetch_op_info * op_ptr; 536 unsigned op; 537 538 unsigned bc_frac_mode:1; 539 unsigned fetch_whole_quad:1; 540 unsigned resource_id:8; 541 542 unsigned src_gpr:7; 543 unsigned src_rel:1; 544 unsigned src_rel_global:1; /* for GDS ops */ 545 unsigned src_sel[4]; 546 547 unsigned dst_gpr:7; 548 unsigned dst_rel:1; 549 unsigned dst_rel_global:1; /* for GDS ops */ 550 unsigned dst_sel[4]; 551 552 unsigned alt_const:1; 553 554 unsigned inst_mod:2; 555 unsigned resource_index_mode:2; 556 unsigned sampler_index_mode:2; 557 558 unsigned coord_type[4]; 559 unsigned lod_bias:7; 560 561 unsigned offset[3]; 562 563 unsigned sampler_id:5; 564 565 566 unsigned fetch_type:2; 567 unsigned mega_fetch_count:6; 568 unsigned coalesced_read:1; 569 unsigned structured_read:2; 570 unsigned lds_req:1; 571 572 unsigned data_format:6; 573 unsigned format_comp_all:1; 574 unsigned num_format_all:2; 575 unsigned semantic_id:8; 576 unsigned srf_mode_all:1; 577 unsigned use_const_fields:1; 578 579 unsigned const_buf_no_stride:1; 580 unsigned endian_swap:2; 581 unsigned mega_fetch:1; 582 583 unsigned src2_gpr:7; /* for GDS */ 584 unsigned alloc_consume:1; 585 unsigned uav_id:4; 586 unsigned uav_index_mode:2; 587 unsigned bcast_first_req:1; 588 set_opbc_fetch589 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } 590 }; 591 592 struct shader_stats { 593 unsigned ndw; 594 unsigned ngpr; 595 unsigned nstack; 596 597 unsigned cf; // clause instructions not included 598 unsigned alu; 599 unsigned alu_clauses; 600 unsigned fetch_clauses; 601 unsigned fetch; 602 unsigned alu_groups; 603 604 unsigned shaders; // number of shaders (for accumulated stats) 605 shader_statsshader_stats606 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), 607 fetch_clauses(), fetch(), alu_groups(), shaders() {} 608 609 void collect(node *n); 610 void accumulate(shader_stats &s); 611 void dump(); 612 void dump_diff(shader_stats &s); 613 }; 614 615 class sb_context { 616 617 public: 618 619 shader_stats src_stats, opt_stats; 620 621 r600_isa *isa; 622 623 sb_hw_chip hw_chip; 624 sb_hw_class hw_class; 625 626 unsigned alu_temp_gprs; 627 unsigned max_fetch; 628 bool has_trans; 629 unsigned vtx_src_num; 630 unsigned num_slots; 631 bool uses_mova_gpr; 632 633 bool r6xx_gpr_index_workaround; 634 635 bool stack_workaround_8xx; 636 bool stack_workaround_9xx; 637 638 unsigned wavefront_size; 639 unsigned stack_entry_size; 640 641 static unsigned dump_pass; 642 static unsigned dump_stat; 643 644 static unsigned dry_run; 645 static unsigned no_fallback; 646 static unsigned safe_math; 647 648 static unsigned dskip_start; 649 static unsigned dskip_end; 650 static unsigned dskip_mode; 651 sb_context()652 sb_context() : src_stats(), opt_stats(), isa(0), 653 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {} 654 655 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); 656 is_r600()657 bool is_r600() {return hw_class == HW_CLASS_R600;} is_r700()658 bool is_r700() {return hw_class == HW_CLASS_R700;} is_evergreen()659 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} is_cayman()660 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} is_egcm()661 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} 662 needs_8xx_stack_workaround()663 bool needs_8xx_stack_workaround() { 664 if (!is_evergreen()) 665 return false; 666 667 switch (hw_chip) { 668 case HW_CHIP_HEMLOCK: 669 case HW_CHIP_CYPRESS: 670 case HW_CHIP_JUNIPER: 671 return false; 672 default: 673 return true; 674 } 675 } 676 needs_9xx_stack_workaround()677 bool needs_9xx_stack_workaround() { 678 return is_cayman(); 679 } 680 hw_class_bit()681 sb_hw_class_bits hw_class_bit() { 682 switch (hw_class) { 683 case HW_CLASS_R600:return HB_R6; 684 case HW_CLASS_R700:return HB_R7; 685 case HW_CLASS_EVERGREEN:return HB_EG; 686 case HW_CLASS_CAYMAN:return HB_CM; 687 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; 688 689 } 690 } 691 cf_opcode(unsigned op)692 unsigned cf_opcode(unsigned op) { 693 return r600_isa_cf_opcode(isa->hw_class, op); 694 } 695 alu_opcode(unsigned op)696 unsigned alu_opcode(unsigned op) { 697 return r600_isa_alu_opcode(isa->hw_class, op); 698 } 699 alu_slots(unsigned op)700 unsigned alu_slots(unsigned op) { 701 return r600_isa_alu_slots(isa->hw_class, op); 702 } 703 alu_slots(const alu_op_info * op_ptr)704 unsigned alu_slots(const alu_op_info * op_ptr) { 705 return op_ptr->slots[isa->hw_class]; 706 } 707 alu_slots_mask(const alu_op_info * op_ptr)708 unsigned alu_slots_mask(const alu_op_info * op_ptr) { 709 unsigned mask = 0; 710 unsigned slot_flags = alu_slots(op_ptr); 711 if (slot_flags & AF_V) 712 mask = 0x0F; 713 if (!is_cayman() && (slot_flags & AF_S)) 714 mask |= 0x10; 715 /* Force LDS_IDX ops into SLOT_X */ 716 if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11)) 717 mask = 0x01; 718 return mask; 719 } 720 fetch_opcode(unsigned op)721 unsigned fetch_opcode(unsigned op) { 722 return r600_isa_fetch_opcode(isa->hw_class, op); 723 } 724 is_kcache_sel(unsigned sel)725 bool is_kcache_sel(unsigned sel) { 726 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); 727 } 728 is_lds_oq(unsigned sel)729 bool is_lds_oq(unsigned sel) { 730 return (sel >= 0xdb && sel <= 0xde); 731 } 732 733 const char * get_hw_class_name(); 734 const char * get_hw_chip_name(); 735 736 }; 737 738 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) 739 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) 740 741 class bc_decoder { 742 743 sb_context &ctx; 744 745 uint32_t* dw; 746 unsigned ndw; 747 748 public: 749 bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)750 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) 751 : ctx(sctx), dw(data), ndw(size) {} 752 753 int decode_cf(unsigned &i, bc_cf &bc); 754 int decode_alu(unsigned &i, bc_alu &bc); 755 int decode_fetch(unsigned &i, bc_fetch &bc); 756 757 private: 758 int decode_cf_alu(unsigned &i, bc_cf &bc); 759 int decode_cf_exp(unsigned &i, bc_cf &bc); 760 int decode_cf_mem(unsigned &i, bc_cf &bc); 761 762 int decode_fetch_vtx(unsigned &i, bc_fetch &bc); 763 int decode_fetch_gds(unsigned &i, bc_fetch &bc); 764 }; 765 766 // bytecode format definition 767 768 class hw_encoding_format { 769 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing 770 hw_encoding_format(); 771 protected: 772 uint32_t value; 773 public: hw_encoding_format(sb_hw_class_bits hw)774 hw_encoding_format(sb_hw_class_bits hw) 775 : hw_target(hw), value(0) {} hw_encoding_format(uint32_t v,sb_hw_class_bits hw)776 hw_encoding_format(uint32_t v, sb_hw_class_bits hw) 777 : hw_target(hw), value(v) {} get_value(sb_hw_class_bits hw)778 uint32_t get_value(sb_hw_class_bits hw) const { 779 assert((hw & hw_target) == hw); 780 return value; 781 } 782 }; 783 784 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \ 785 class fmt##_##hwset : public hw_encoding_format {\ 786 typedef fmt##_##hwset thistype; \ 787 public: \ 788 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ 789 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; 790 791 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) 792 793 #define BC_FORMAT_END(fmt) }; 794 795 // bytecode format field definition 796 797 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ 798 thistype & name(unsigned v) { \ 799 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ 800 return *this; \ 801 } \ 802 unsigned get_##name() const { \ 803 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ 804 } 805 806 #define BC_RSRVD(fmt, last_bit, first_bit) 807 808 // CLAMP macro defined elsewhere interferes with bytecode field name 809 #undef CLAMP 810 #include "sb_bc_fmt_def.inc" 811 812 #undef BC_FORMAT_BEGIN 813 #undef BC_FORMAT_END 814 #undef BC_FIELD 815 #undef BC_RSRVD 816 817 class bc_parser { 818 sb_context & ctx; 819 820 bc_decoder *dec; 821 822 r600_bytecode *bc; 823 r600_shader *pshader; 824 825 uint32_t *dw; 826 unsigned bc_ndw; 827 828 unsigned max_cf; 829 830 shader *sh; 831 832 int error; 833 834 alu_node *slots[2][5]; 835 unsigned cgroup; 836 837 typedef std::vector<cf_node*> id_cf_map; 838 id_cf_map cf_map; 839 840 typedef std::stack<region_node*> region_stack; 841 region_stack loop_stack; 842 843 bool gpr_reladdr; 844 845 // Note: currently relies on input emitting SET_CF in same basic block as uses 846 value *cf_index_value[2]; 847 alu_node *mova; 848 public: 849 bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)850 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : 851 ctx(sctx), dec(), bc(bc), pshader(pshader), 852 dw(), bc_ndw(), max_cf(), 853 sh(), error(), slots(), cgroup(), 854 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } 855 856 int decode(); 857 int prepare(); 858 get_shader()859 shader* get_shader() { assert(!error); return sh; } 860 861 private: 862 863 int decode_shader(); 864 865 int parse_decls(); 866 867 int decode_cf(unsigned &i, bool &eop); 868 869 int decode_alu_clause(cf_node *cf); 870 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); 871 872 int decode_fetch_clause(cf_node *cf); 873 874 int prepare_ir(); 875 int prepare_alu_clause(cf_node *cf); 876 int prepare_alu_group(cf_node* cf, alu_group_node *g); 877 int prepare_fetch_clause(cf_node *cf); 878 879 int prepare_loop(cf_node *c); 880 int prepare_if(cf_node *c); 881 882 void save_set_cf_index(value *val, unsigned idx); 883 value *get_cf_index_value(unsigned idx); 884 void save_mova(alu_node *mova); 885 alu_node *get_mova(); 886 }; 887 888 889 890 891 class bytecode { 892 typedef std::vector<uint32_t> bc_vector; 893 sb_hw_class_bits hw_class_bit; 894 895 bc_vector bc; 896 897 unsigned pos; 898 899 public: 900 901 bytecode(sb_hw_class_bits hw, unsigned rdw = 256) hw_class_bit(hw)902 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } 903 ndw()904 unsigned ndw() { return bc.size(); } 905 write_data(uint32_t * dst)906 void write_data(uint32_t* dst) { 907 std::copy(bc.begin(), bc.end(), dst); 908 } 909 align(unsigned a)910 void align(unsigned a) { 911 unsigned size = bc.size(); 912 size = (size + a - 1) & ~(a-1); 913 bc.resize(size); 914 } 915 set_size(unsigned sz)916 void set_size(unsigned sz) { 917 assert(sz >= bc.size()); 918 bc.resize(sz); 919 } 920 seek(unsigned p)921 void seek(unsigned p) { 922 if (p != pos) { 923 if (p > bc.size()) { 924 bc.resize(p); 925 } 926 pos = p; 927 } 928 } 929 get_pos()930 unsigned get_pos() { return pos; } data()931 uint32_t *data() { return &bc[0]; } 932 933 bytecode & operator <<(uint32_t v) { 934 if (pos == ndw()) { 935 bc.push_back(v); 936 } else 937 bc.at(pos) = v; 938 ++pos; 939 return *this; 940 } 941 942 bytecode & operator <<(const hw_encoding_format &e) { 943 *this << e.get_value(hw_class_bit); 944 return *this; 945 } 946 947 bytecode & operator <<(const bytecode &b) { 948 bc.insert(bc.end(), b.bc.begin(), b.bc.end()); 949 return *this; 950 } 951 at(unsigned dw_id)952 uint32_t at(unsigned dw_id) { return bc.at(dw_id); } 953 }; 954 955 956 class bc_builder { 957 shader &sh; 958 sb_context &ctx; 959 bytecode bb; 960 int error; 961 962 public: 963 964 bc_builder(shader &s); 965 int build(); get_bytecode()966 bytecode& get_bytecode() { assert(!error); return bb; } 967 968 private: 969 970 int build_cf(cf_node *n); 971 972 int build_cf_alu(cf_node *n); 973 int build_cf_mem(cf_node *n); 974 int build_cf_exp(cf_node *n); 975 976 int build_alu_clause(cf_node *n); 977 int build_alu_group(alu_group_node *n); 978 int build_alu(alu_node *n); 979 980 int build_fetch_clause(cf_node *n); 981 int build_fetch_tex(fetch_node *n); 982 int build_fetch_vtx(fetch_node *n); 983 int build_fetch_gds(fetch_node *n); 984 }; 985 986 } // namespace r600_sb 987 988 #endif /* SB_BC_H_ */ 989