• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * on the rights to use, copy, modify, merge, publish, distribute, sub
8   * license, and/or sell copies of the Software, and to permit persons to whom
9   * the Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21   * USE OR OTHER DEALINGS IN THE SOFTWARE.
22   *
23   * Authors:
24   *      Vadim Girlin
25   */
26  
27  #ifndef SB_BC_H_
28  #define SB_BC_H_
29  
30  #include <stdint.h>
31  #include "r600_isa.h"
32  
33  #include <cstdio>
34  #include <string>
35  #include <vector>
36  #include <stack>
37  
38  struct r600_bytecode;
39  struct r600_shader;
40  
41  namespace r600_sb {
42  
43  class hw_encoding_format;
44  class node;
45  class alu_node;
46  class cf_node;
47  class fetch_node;
48  class alu_group_node;
49  class region_node;
50  class shader;
51  class value;
52  
53  class sb_ostream {
54  public:
sb_ostream()55  	sb_ostream() {}
56  
57  	virtual void write(const char *s) = 0;
58  
59  	sb_ostream& operator <<(const char *s) {
60  		write(s);
61  		return *this;
62  	}
63  
64  	sb_ostream& operator <<(const std::string& s) {
65  		return *this << s.c_str();
66  	}
67  
68  	sb_ostream& operator <<(void *p) {
69  		char b[32];
70  		sprintf(b, "%p", p);
71  		return *this << b;
72  	}
73  
74  	sb_ostream& operator <<(char c) {
75  		char b[2];
76  		sprintf(b, "%c", c);
77  		return *this << b;
78  	}
79  
80  	sb_ostream& operator <<(int n) {
81  		char b[32];
82  		sprintf(b, "%d", n);
83  		return *this << b;
84  	}
85  
86  	sb_ostream& operator <<(unsigned n) {
87  		char b[32];
88  		sprintf(b, "%u", n);
89  		return *this << b;
90  	}
91  
92  	sb_ostream& operator <<(double d) {
93  		char b[32];
94  		snprintf(b, 32, "%g", d);
95  		return *this << b;
96  	}
97  
98  	// print as field of specified width, right aligned
print_w(int n,int width)99  	void print_w(int n, int width) {
100  		char b[256],f[8];
101  		sprintf(f, "%%%dd", width);
102  		snprintf(b, 256, f, n);
103  		write(b);
104  	}
105  
106  	// print as field of specified width, left aligned
print_wl(int n,int width)107  	void print_wl(int n, int width) {
108  		char b[256],f[8];
109  		sprintf(f, "%%-%dd", width);
110  		snprintf(b, 256, f, n);
111  		write(b);
112  	}
113  
114  	// print as field of specified width, left aligned
print_wl(const std::string & s,int width)115  	void print_wl(const std::string &s, int width) {
116  		write(s.c_str());
117  		int l = s.length();
118  		while (l++ < width) {
119  			write(" ");
120  		}
121  	}
122  
123  	// print int as field of specified width, right aligned, zero-padded
print_zw(int n,int width)124  	void print_zw(int n, int width) {
125  		char b[256],f[8];
126  		sprintf(f, "%%0%dd", width);
127  		snprintf(b, 256, f, n);
128  		write(b);
129  	}
130  
131  	// print int as field of specified width, right aligned, zero-padded, hex
print_zw_hex(int n,int width)132  	void print_zw_hex(int n, int width) {
133  		char b[256],f[8];
134  		sprintf(f, "%%0%dx", width);
135  		snprintf(b, 256, f, n);
136  		write(b);
137  	}
138  };
139  
140  class sb_ostringstream : public sb_ostream {
141  	std::string data;
142  public:
sb_ostringstream()143  	sb_ostringstream() : data() {}
144  
write(const char * s)145  	virtual void write(const char *s) {
146  		data += s;
147  	}
148  
clear()149  	void clear() { data.clear(); }
150  
c_str()151  	const char* c_str() { return data.c_str(); }
str()152  	std::string& str() { return data; }
153  };
154  
155  class sb_log : public sb_ostream {
156  	FILE *o;
157  public:
sb_log()158  	sb_log() : o(stderr) {}
159  
write(const char * s)160  	virtual void write(const char *s) {
161  		fputs(s, o);
162  	}
163  };
164  
165  extern sb_log sblog;
166  
167  enum shader_target
168  {
169  	TARGET_UNKNOWN,
170  	TARGET_VS,
171  	TARGET_ES,
172  	TARGET_PS,
173  	TARGET_GS,
174  	TARGET_GS_COPY,
175  	TARGET_COMPUTE,
176  	TARGET_FETCH,
177  	TARGET_HS,
178  	TARGET_LS,
179  
180  	TARGET_NUM
181  };
182  
183  enum sb_hw_class_bits
184  {
185  	HB_R6	= (1<<0),
186  	HB_R7	= (1<<1),
187  	HB_EG	= (1<<2),
188  	HB_CM	= (1<<3),
189  
190  	HB_R6R7 = (HB_R6 | HB_R7),
191  	HB_EGCM = (HB_EG | HB_CM),
192  	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193  	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194  
195  	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196  };
197  
198  enum sb_hw_chip
199  {
200  	HW_CHIP_UNKNOWN,
201  	HW_CHIP_R600,
202  	HW_CHIP_RV610,
203  	HW_CHIP_RV630,
204  	HW_CHIP_RV670,
205  	HW_CHIP_RV620,
206  	HW_CHIP_RV635,
207  	HW_CHIP_RS780,
208  	HW_CHIP_RS880,
209  	HW_CHIP_RV770,
210  	HW_CHIP_RV730,
211  	HW_CHIP_RV710,
212  	HW_CHIP_RV740,
213  	HW_CHIP_CEDAR,
214  	HW_CHIP_REDWOOD,
215  	HW_CHIP_JUNIPER,
216  	HW_CHIP_CYPRESS,
217  	HW_CHIP_HEMLOCK,
218  	HW_CHIP_PALM,
219  	HW_CHIP_SUMO,
220  	HW_CHIP_SUMO2,
221  	HW_CHIP_BARTS,
222  	HW_CHIP_TURKS,
223  	HW_CHIP_CAICOS,
224  	HW_CHIP_CAYMAN,
225  	HW_CHIP_ARUBA
226  };
227  
228  enum sb_hw_class
229  {
230  	HW_CLASS_UNKNOWN,
231  	HW_CLASS_R600,
232  	HW_CLASS_R700,
233  	HW_CLASS_EVERGREEN,
234  	HW_CLASS_CAYMAN
235  };
236  
237  enum alu_slots {
238  	SLOT_X = 0,
239  	SLOT_Y = 1,
240  	SLOT_Z = 2,
241  	SLOT_W = 3,
242  	SLOT_TRANS = 4
243  };
244  
245  enum misc_consts {
246  	MAX_ALU_LITERALS = 4,
247  	MAX_ALU_SLOTS = 128,
248  	MAX_GPR = 128,
249  	MAX_CHAN = 4
250  
251  };
252  
253  enum alu_src_sel {
254  
255  	ALU_SRC_LDS_OQ_A = 219,
256  	ALU_SRC_LDS_OQ_B = 220,
257  	ALU_SRC_LDS_OQ_A_POP = 221,
258  	ALU_SRC_LDS_OQ_B_POP = 222,
259  	ALU_SRC_LDS_DIRECT_A = 223,
260  	ALU_SRC_LDS_DIRECT_B = 224,
261  	ALU_SRC_TIME_HI = 227,
262  	ALU_SRC_TIME_LO = 228,
263  	ALU_SRC_MASK_HI = 229,
264  	ALU_SRC_MASK_LO = 230,
265  	ALU_SRC_HW_WAVE_ID = 231,
266  	ALU_SRC_SIMD_ID = 232,
267  	ALU_SRC_SE_ID = 233,
268  	ALU_SRC_HW_THREADGRP_ID = 234,
269  	ALU_SRC_WAVE_ID_IN_GRP = 235,
270  	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271  	ALU_SRC_HW_ALU_ODD = 237,
272  	ALU_SRC_LOOP_IDX = 238,
273  	ALU_SRC_PARAM_BASE_ADDR = 240,
274  	ALU_SRC_NEW_PRIM_MASK = 241,
275  	ALU_SRC_PRIM_MASK_HI = 242,
276  	ALU_SRC_PRIM_MASK_LO = 243,
277  	ALU_SRC_1_DBL_L = 244,
278  	ALU_SRC_1_DBL_M = 245,
279  	ALU_SRC_0_5_DBL_L = 246,
280  	ALU_SRC_0_5_DBL_M = 247,
281  	ALU_SRC_0 = 248,
282  	ALU_SRC_1 = 249,
283  	ALU_SRC_1_INT = 250,
284  	ALU_SRC_M_1_INT = 251,
285  	ALU_SRC_0_5 = 252,
286  	ALU_SRC_LITERAL = 253,
287  	ALU_SRC_PV = 254,
288  	ALU_SRC_PS = 255,
289  
290  	ALU_SRC_PARAM_OFFSET = 448
291  };
292  
293  enum alu_predicate_select
294  {
295  	PRED_SEL_OFF	= 0,
296  //	RESERVED		= 1,
297  	PRED_SEL_0		= 2,
298  	PRED_SEL_1		= 3
299  };
300  
301  
302  enum alu_omod {
303  	OMOD_OFF  = 0,
304  	OMOD_M2   = 1,
305  	OMOD_M4   = 2,
306  	OMOD_D2   = 3
307  };
308  
309  enum alu_index_mode {
310  	INDEX_AR_X        = 0,
311  	INDEX_AR_Y_R600   = 1,
312  	INDEX_AR_Z_R600   = 2,
313  	INDEX_AR_W_R600   = 3,
314  
315  	INDEX_LOOP        = 4,
316  	INDEX_GLOBAL      = 5,
317  	INDEX_GLOBAL_AR_X = 6
318  };
319  
320  enum alu_cayman_mova_dst {
321  	CM_MOVADST_AR_X,
322  	CM_MOVADST_PC,
323  	CM_MOVADST_IDX0,
324  	CM_MOVADST_IDX1,
325  	CM_MOVADST_CG0,		// clause-global byte 0
326  	CM_MOVADST_CG1,
327  	CM_MOVADST_CG2,
328  	CM_MOVADST_CG3
329  };
330  
331  enum alu_cayman_exec_mask_op {
332  	CM_EMO_DEACTIVATE,
333  	CM_EMO_BREAK,
334  	CM_EMO_CONTINUE,
335  	CM_EMO_KILL
336  };
337  
338  
339  enum cf_exp_type {
340  	EXP_PIXEL,
341  	EXP_POS,
342  	EXP_PARAM,
343  
344  	EXP_TYPE_COUNT
345  };
346  
347  enum cf_mem_type {
348  	MEM_WRITE,
349  	MEM_WRITE_IND,
350  	MEM_WRITE_ACK,
351  	MEM_WRITE_IND_ACK
352  };
353  
354  
355  enum alu_kcache_mode {
356  	KC_LOCK_NONE,
357  	KC_LOCK_1,
358  	KC_LOCK_2,
359  	KC_LOCK_LOOP
360  };
361  
362  enum alu_kcache_index_mode {
363  	KC_INDEX_NONE,
364  	KC_INDEX_0,
365  	KC_INDEX_1,
366  	KC_INDEX_INVALID
367  };
368  
369  enum chan_select {
370  	SEL_X	= 0,
371  	SEL_Y	= 1,
372  	SEL_Z	= 2,
373  	SEL_W	= 3,
374  	SEL_0	= 4,
375  	SEL_1	= 5,
376  //	RESERVED = 6,
377  	SEL_MASK = 7
378  };
379  
380  enum bank_swizzle {
381  	VEC_012 = 0,
382  	VEC_021 = 1,
383  	VEC_120 = 2,
384  	VEC_102 = 3,
385  	VEC_201 = 4,
386  	VEC_210 = 5,
387  
388  	VEC_NUM = 6,
389  
390  	SCL_210 = 0,
391  	SCL_122 = 1,
392  	SCL_212 = 2,
393  	SCL_221 = 3,
394  
395  	SCL_NUM = 4
396  
397  };
398  
399  enum sched_queue_id {
400  	SQ_CF,
401  	SQ_ALU,
402  	SQ_TEX,
403  	SQ_VTX,
404  
405  	SQ_NUM
406  };
407  
408  struct literal {
409  	union {
410  		int32_t i;
411  		uint32_t u;
412  		float f;
413  	};
414  
iliteral415  	literal(int32_t i = 0) : i(i) {}
literalliteral416  	literal(uint32_t u) : u(u) {}
literalliteral417  	literal(float f) : f(f) {}
literalliteral418  	literal(double f) : f(f) {}
uint32_tliteral419  	operator uint32_t() const { return u; }
420  	bool operator ==(literal l) { return u == l.u; }
421  	bool operator ==(int v_int) { return i == v_int; }
422  	bool operator ==(unsigned v_uns) { return u == v_uns; }
423  };
424  
425  struct bc_kcache {
426  	unsigned mode;
427  	unsigned bank;
428  	unsigned addr;
429  	unsigned index_mode;
430  } ;
431  
432  // TODO optimize bc structures
433  
434  struct bc_cf {
435  
436  	bc_kcache kc[4];
437  
438  	unsigned id;
439  
440  
441  	const cf_op_info * op_ptr;
442  	unsigned op;
443  
444  	unsigned addr:32;
445  
446  	unsigned alt_const:1;
447  	unsigned uses_waterfall:1;
448  
449  	unsigned barrier:1;
450  	unsigned count:7;
451  	unsigned pop_count:3;
452  	unsigned call_count:6;
453  	unsigned whole_quad_mode:1;
454  	unsigned valid_pixel_mode:1;
455  
456  	unsigned jumptable_sel:3;
457  	unsigned cf_const:5;
458  	unsigned cond:2;
459  	unsigned end_of_program:1;
460  
461  	unsigned array_base:13;
462  	unsigned elem_size:2;
463  	unsigned index_gpr:7;
464  	unsigned rw_gpr:7;
465  	unsigned rw_rel:1;
466  	unsigned type:2;
467  
468  	unsigned burst_count:4;
469  	unsigned mark:1;
470  	unsigned sel[4];
471  
472  	unsigned array_size:12;
473  	unsigned comp_mask:4;
474  
475  	unsigned rat_id:4;
476  	unsigned rat_inst:6;
477  	unsigned rat_index_mode:2;
478  
set_opbc_cf479  	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
480  
is_alu_extendedbc_cf481  	bool is_alu_extended() {
482  		assert(op_ptr->flags & CF_ALU);
483  		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
484  			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
485  			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
486  	}
487  
488  };
489  
490  struct bc_alu_src {
491  	unsigned sel:9;
492  	unsigned chan:2;
493  	unsigned neg:1;
494  	unsigned abs:1;
495  	unsigned rel:1;
496  	literal value;
497  };
498  
499  struct bc_alu {
500  	const alu_op_info * op_ptr;
501  	unsigned op;
502  
503  	bc_alu_src src[3];
504  
505  	unsigned dst_gpr:7;
506  	unsigned dst_chan:2;
507  	unsigned dst_rel:1;
508  	unsigned clamp:1;
509  	unsigned omod:2;
510  	unsigned bank_swizzle:3;
511  
512  	unsigned index_mode:3;
513  	unsigned last:1;
514  	unsigned pred_sel:2;
515  
516  	unsigned fog_merge:1;
517  	unsigned write_mask:1;
518  	unsigned update_exec_mask:1;
519  	unsigned update_pred:1;
520  
521  	unsigned slot:3;
522  
523  	unsigned lds_idx_offset:6;
524  
525  	alu_op_flags slot_flags;
526  
set_opbc_alu527  	void set_op(unsigned op) {
528  		this->op = op;
529  		op_ptr = r600_isa_alu(op);
530  	}
531  };
532  
533  struct bc_fetch {
534  	const fetch_op_info * op_ptr;
535  	unsigned op;
536  
537  	unsigned bc_frac_mode:1;
538  	unsigned fetch_whole_quad:1;
539  	unsigned resource_id:8;
540  
541  	unsigned src_gpr:7;
542  	unsigned src_rel:1;
543  	unsigned src_rel_global:1; /* for GDS ops */
544  	unsigned src_sel[4];
545  
546  	unsigned dst_gpr:7;
547  	unsigned dst_rel:1;
548  	unsigned dst_rel_global:1; /* for GDS ops */
549  	unsigned dst_sel[4];
550  
551  	unsigned alt_const:1;
552  
553  	unsigned inst_mod:2;
554  	unsigned resource_index_mode:2;
555  	unsigned sampler_index_mode:2;
556  
557  	unsigned coord_type[4];
558  	unsigned lod_bias:7;
559  
560  	unsigned offset[3];
561  
562  	unsigned sampler_id:5;
563  
564  
565  	unsigned fetch_type:2;
566  	unsigned mega_fetch_count:6;
567  	unsigned coalesced_read:1;
568  	unsigned structured_read:2;
569  	unsigned lds_req:1;
570  
571  	unsigned data_format:6;
572  	unsigned format_comp_all:1;
573  	unsigned num_format_all:2;
574  	unsigned semantic_id:8;
575  	unsigned srf_mode_all:1;
576  	unsigned use_const_fields:1;
577  
578  	unsigned const_buf_no_stride:1;
579  	unsigned endian_swap:2;
580  	unsigned mega_fetch:1;
581  
582  	unsigned src2_gpr:7; /* for GDS */
set_opbc_fetch583  	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
584  };
585  
586  struct shader_stats {
587  	unsigned	ndw;
588  	unsigned	ngpr;
589  	unsigned	nstack;
590  
591  	unsigned	cf; // clause instructions not included
592  	unsigned	alu;
593  	unsigned	alu_clauses;
594  	unsigned	fetch_clauses;
595  	unsigned	fetch;
596  	unsigned	alu_groups;
597  
598  	unsigned	shaders;		// number of shaders (for accumulated stats)
599  
shader_statsshader_stats600  	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
601  			fetch_clauses(), fetch(), alu_groups(), shaders() {}
602  
603  	void collect(node *n);
604  	void accumulate(shader_stats &s);
605  	void dump();
606  	void dump_diff(shader_stats &s);
607  };
608  
609  class sb_context {
610  
611  public:
612  
613  	shader_stats src_stats, opt_stats;
614  
615  	r600_isa *isa;
616  
617  	sb_hw_chip hw_chip;
618  	sb_hw_class hw_class;
619  
620  	unsigned alu_temp_gprs;
621  	unsigned max_fetch;
622  	bool has_trans;
623  	unsigned vtx_src_num;
624  	unsigned num_slots;
625  	bool uses_mova_gpr;
626  
627  	bool r6xx_gpr_index_workaround;
628  
629  	bool stack_workaround_8xx;
630  	bool stack_workaround_9xx;
631  
632  	unsigned wavefront_size;
633  	unsigned stack_entry_size;
634  
635  	static unsigned dump_pass;
636  	static unsigned dump_stat;
637  
638  	static unsigned dry_run;
639  	static unsigned no_fallback;
640  	static unsigned safe_math;
641  
642  	static unsigned dskip_start;
643  	static unsigned dskip_end;
644  	static unsigned dskip_mode;
645  
sb_context()646  	sb_context() : src_stats(), opt_stats(), isa(0),
647  			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
648  
649  	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
650  
is_r600()651  	bool is_r600() {return hw_class == HW_CLASS_R600;}
is_r700()652  	bool is_r700() {return hw_class == HW_CLASS_R700;}
is_evergreen()653  	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
is_cayman()654  	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
is_egcm()655  	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
656  
needs_8xx_stack_workaround()657  	bool needs_8xx_stack_workaround() {
658  		if (!is_evergreen())
659  			return false;
660  
661  		switch (hw_chip) {
662  		case HW_CHIP_CYPRESS:
663  		case HW_CHIP_JUNIPER:
664  			return false;
665  		default:
666  			return true;
667  		}
668  	}
669  
needs_9xx_stack_workaround()670  	bool needs_9xx_stack_workaround() {
671  		return is_cayman();
672  	}
673  
hw_class_bit()674  	sb_hw_class_bits hw_class_bit() {
675  		switch (hw_class) {
676  		case HW_CLASS_R600:return HB_R6;
677  		case HW_CLASS_R700:return HB_R7;
678  		case HW_CLASS_EVERGREEN:return HB_EG;
679  		case HW_CLASS_CAYMAN:return HB_CM;
680  		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
681  
682  		}
683  	}
684  
cf_opcode(unsigned op)685  	unsigned cf_opcode(unsigned op) {
686  		return r600_isa_cf_opcode(isa->hw_class, op);
687  	}
688  
alu_opcode(unsigned op)689  	unsigned alu_opcode(unsigned op) {
690  		return r600_isa_alu_opcode(isa->hw_class, op);
691  	}
692  
alu_slots(unsigned op)693  	unsigned alu_slots(unsigned op) {
694  		return r600_isa_alu_slots(isa->hw_class, op);
695  	}
696  
alu_slots(const alu_op_info * op_ptr)697  	unsigned alu_slots(const alu_op_info * op_ptr) {
698  		return op_ptr->slots[isa->hw_class];
699  	}
700  
alu_slots_mask(const alu_op_info * op_ptr)701  	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
702  		unsigned mask = 0;
703  		unsigned slot_flags = alu_slots(op_ptr);
704  		if (slot_flags & AF_V)
705  			mask = 0x0F;
706  		if (!is_cayman() && (slot_flags & AF_S))
707  			mask |= 0x10;
708  		return mask;
709  	}
710  
fetch_opcode(unsigned op)711  	unsigned fetch_opcode(unsigned op) {
712  		return r600_isa_fetch_opcode(isa->hw_class, op);
713  	}
714  
is_kcache_sel(unsigned sel)715  	bool is_kcache_sel(unsigned sel) {
716  		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
717  	}
718  
719  	const char * get_hw_class_name();
720  	const char * get_hw_chip_name();
721  
722  };
723  
724  #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
725  #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
726  
727  class bc_decoder {
728  
729  	sb_context &ctx;
730  
731  	uint32_t* dw;
732  	unsigned ndw;
733  
734  public:
735  
bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)736  	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
737  		: ctx(sctx), dw(data), ndw(size) {}
738  
739  	int decode_cf(unsigned &i, bc_cf &bc);
740  	int decode_alu(unsigned &i, bc_alu &bc);
741  	int decode_fetch(unsigned &i, bc_fetch &bc);
742  
743  private:
744  	int decode_cf_alu(unsigned &i, bc_cf &bc);
745  	int decode_cf_exp(unsigned &i, bc_cf &bc);
746  	int decode_cf_mem(unsigned &i, bc_cf &bc);
747  
748  	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
749  	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
750  };
751  
752  // bytecode format definition
753  
754  class hw_encoding_format {
755  	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
756  	hw_encoding_format();
757  protected:
758  	uint32_t value;
759  public:
hw_encoding_format(sb_hw_class_bits hw)760  	hw_encoding_format(sb_hw_class_bits hw)
761  		: hw_target(hw), value(0) {}
hw_encoding_format(uint32_t v,sb_hw_class_bits hw)762  	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
763  		: hw_target(hw), value(v) {}
get_value(sb_hw_class_bits hw)764  	uint32_t get_value(sb_hw_class_bits hw) const {
765  		assert((hw & hw_target) == hw);
766  		return value;
767  	}
768  };
769  
770  #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
771  class fmt##_##hwset : public hw_encoding_format {\
772  	typedef fmt##_##hwset thistype; \
773  public: \
774  	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
775  	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
776  
777  #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
778  
779  #define BC_FORMAT_END(fmt) };
780  
781  // bytecode format field definition
782  
783  #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
784  	thistype & name(unsigned v) { \
785  		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
786  		return *this; \
787  	} \
788  	unsigned get_##name() const { \
789  		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
790  	} \
791  
792  #define BC_RSRVD(fmt, last_bit, first_bit)
793  
794  // CLAMP macro defined elsewhere interferes with bytecode field name
795  #undef CLAMP
796  #include "sb_bc_fmt_def.inc"
797  
798  #undef BC_FORMAT_BEGIN
799  #undef BC_FORMAT_END
800  #undef BC_FIELD
801  #undef BC_RSRVD
802  
803  class bc_parser {
804  	sb_context & ctx;
805  
806  	bc_decoder *dec;
807  
808  	r600_bytecode *bc;
809  	r600_shader *pshader;
810  
811  	uint32_t *dw;
812  	unsigned bc_ndw;
813  
814  	unsigned max_cf;
815  
816  	shader *sh;
817  
818  	int error;
819  
820  	alu_node *slots[2][5];
821  	unsigned cgroup;
822  
823  	typedef std::vector<cf_node*> id_cf_map;
824  	id_cf_map cf_map;
825  
826  	typedef std::stack<region_node*> region_stack;
827  	region_stack loop_stack;
828  
829  	bool gpr_reladdr;
830  
831  	// Note: currently relies on input emitting SET_CF in same basic block as uses
832  	value *cf_index_value[2];
833  	alu_node *mova;
834  public:
835  
bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)836  	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
837  		ctx(sctx), dec(), bc(bc), pshader(pshader),
838  		dw(), bc_ndw(), max_cf(),
839  		sh(), error(), slots(), cgroup(),
840  		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
841  
842  	int decode();
843  	int prepare();
844  
get_shader()845  	shader* get_shader() { assert(!error); return sh; }
846  
847  private:
848  
849  	int decode_shader();
850  
851  	int parse_decls();
852  
853  	int decode_cf(unsigned &i, bool &eop);
854  
855  	int decode_alu_clause(cf_node *cf);
856  	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
857  
858  	int decode_fetch_clause(cf_node *cf);
859  
860  	int prepare_ir();
861  	int prepare_alu_clause(cf_node *cf);
862  	int prepare_alu_group(cf_node* cf, alu_group_node *g);
863  	int prepare_fetch_clause(cf_node *cf);
864  
865  	int prepare_loop(cf_node *c);
866  	int prepare_if(cf_node *c);
867  
868  	void save_set_cf_index(value *val, unsigned idx);
869  	value *get_cf_index_value(unsigned idx);
870  	void save_mova(alu_node *mova);
871  	alu_node *get_mova();
872  };
873  
874  
875  
876  
877  class bytecode {
878  	typedef std::vector<uint32_t> bc_vector;
879  	sb_hw_class_bits hw_class_bit;
880  
881  	bc_vector bc;
882  
883  	unsigned pos;
884  
885  public:
886  
887  	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
hw_class_bit(hw)888  		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
889  
ndw()890  	unsigned ndw() { return bc.size(); }
891  
write_data(uint32_t * dst)892  	void write_data(uint32_t* dst) {
893  		std::copy(bc.begin(), bc.end(), dst);
894  	}
895  
align(unsigned a)896  	void align(unsigned a) {
897  		unsigned size = bc.size();
898  		size = (size + a - 1) & ~(a-1);
899  		bc.resize(size);
900  	}
901  
set_size(unsigned sz)902  	void set_size(unsigned sz) {
903  		assert(sz >= bc.size());
904  		bc.resize(sz);
905  	}
906  
seek(unsigned p)907  	void seek(unsigned p) {
908  		if (p != pos) {
909  			if (p > bc.size()) {
910  				bc.resize(p);
911  			}
912  			pos = p;
913  		}
914  	}
915  
get_pos()916  	unsigned get_pos() { return pos; }
data()917  	uint32_t *data() { return &bc[0]; }
918  
919  	bytecode & operator <<(uint32_t v) {
920  		if (pos == ndw()) {
921  			bc.push_back(v);
922  		} else
923  			bc.at(pos) = v;
924  		++pos;
925  		return *this;
926  	}
927  
928  	bytecode & operator <<(const hw_encoding_format &e) {
929  		*this << e.get_value(hw_class_bit);
930  		return *this;
931  	}
932  
933  	bytecode & operator <<(const bytecode &b) {
934  		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
935  		return *this;
936  	}
937  
at(unsigned dw_id)938  	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
939  };
940  
941  
942  class bc_builder {
943  	shader &sh;
944  	sb_context &ctx;
945  	bytecode bb;
946  	int error;
947  
948  public:
949  
950  	bc_builder(shader &s);
951  	int build();
get_bytecode()952  	bytecode& get_bytecode() { assert(!error); return bb; }
953  
954  private:
955  
956  	int build_cf(cf_node *n);
957  
958  	int build_cf_alu(cf_node *n);
959  	int build_cf_mem(cf_node *n);
960  	int build_cf_exp(cf_node *n);
961  
962  	int build_alu_clause(cf_node *n);
963  	int build_alu_group(alu_group_node *n);
964  	int build_alu(alu_node *n);
965  
966  	int build_fetch_clause(cf_node *n);
967  	int build_fetch_tex(fetch_node *n);
968  	int build_fetch_vtx(fetch_node *n);
969  };
970  
971  } // namespace r600_sb
972  
973  #endif /* SB_BC_H_ */
974