• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_BC_H_
28 #define SB_BC_H_
29 
30 #include <stdint.h>
31 #include "r600_isa.h"
32 
33 #include <cstdio>
34 #include <string>
35 #include <vector>
36 #include <stack>
37 
38 struct r600_bytecode;
39 struct r600_shader;
40 
41 namespace r600_sb {
42 
43 class hw_encoding_format;
44 class node;
45 class alu_node;
46 class cf_node;
47 class fetch_node;
48 class alu_group_node;
49 class region_node;
50 class shader;
51 class value;
52 
53 class sb_ostream {
54 public:
sb_ostream()55 	sb_ostream() {}
56 
57 	virtual void write(const char *s) = 0;
58 
59 	sb_ostream& operator <<(const char *s) {
60 		write(s);
61 		return *this;
62 	}
63 
64 	sb_ostream& operator <<(const std::string& s) {
65 		return *this << s.c_str();
66 	}
67 
68 	sb_ostream& operator <<(void *p) {
69 		char b[32];
70 		sprintf(b, "%p", p);
71 		return *this << b;
72 	}
73 
74 	sb_ostream& operator <<(char c) {
75 		char b[2];
76 		sprintf(b, "%c", c);
77 		return *this << b;
78 	}
79 
80 	sb_ostream& operator <<(int n) {
81 		char b[32];
82 		sprintf(b, "%d", n);
83 		return *this << b;
84 	}
85 
86 	sb_ostream& operator <<(unsigned n) {
87 		char b[32];
88 		sprintf(b, "%u", n);
89 		return *this << b;
90 	}
91 
92 	sb_ostream& operator <<(double d) {
93 		char b[32];
94 		snprintf(b, 32, "%g", d);
95 		return *this << b;
96 	}
97 
98 	// print as field of specified width, right aligned
print_w(int n,int width)99 	void print_w(int n, int width) {
100 		char b[256],f[8];
101 		sprintf(f, "%%%dd", width);
102 		snprintf(b, 256, f, n);
103 		write(b);
104 	}
105 
106 	// print as field of specified width, left aligned
print_wl(int n,int width)107 	void print_wl(int n, int width) {
108 		char b[256],f[8];
109 		sprintf(f, "%%-%dd", width);
110 		snprintf(b, 256, f, n);
111 		write(b);
112 	}
113 
114 	// print as field of specified width, left aligned
print_wl(const std::string & s,int width)115 	void print_wl(const std::string &s, int width) {
116 		write(s.c_str());
117 		int l = s.length();
118 		while (l++ < width) {
119 			write(" ");
120 		}
121 	}
122 
123 	// print int as field of specified width, right aligned, zero-padded
print_zw(int n,int width)124 	void print_zw(int n, int width) {
125 		char b[256],f[8];
126 		sprintf(f, "%%0%dd", width);
127 		snprintf(b, 256, f, n);
128 		write(b);
129 	}
130 
131 	// print int as field of specified width, right aligned, zero-padded, hex
print_zw_hex(int n,int width)132 	void print_zw_hex(int n, int width) {
133 		char b[256],f[8];
134 		sprintf(f, "%%0%dx", width);
135 		snprintf(b, 256, f, n);
136 		write(b);
137 	}
138 };
139 
140 class sb_ostringstream : public sb_ostream {
141 	std::string data;
142 public:
sb_ostringstream()143 	sb_ostringstream() : data() {}
144 
write(const char * s)145 	virtual void write(const char *s) {
146 		data += s;
147 	}
148 
clear()149 	void clear() { data.clear(); }
150 
c_str()151 	const char* c_str() { return data.c_str(); }
str()152 	std::string& str() { return data; }
153 };
154 
155 class sb_log : public sb_ostream {
156 	FILE *o;
157 public:
sb_log()158 	sb_log() : o(stderr) {}
159 
write(const char * s)160 	virtual void write(const char *s) {
161 		fputs(s, o);
162 	}
163 };
164 
165 extern sb_log sblog;
166 
167 enum shader_target
168 {
169 	TARGET_UNKNOWN,
170 	TARGET_VS,
171 	TARGET_ES,
172 	TARGET_PS,
173 	TARGET_GS,
174 	TARGET_GS_COPY,
175 	TARGET_COMPUTE,
176 	TARGET_FETCH,
177 	TARGET_HS,
178 	TARGET_LS,
179 
180 	TARGET_NUM
181 };
182 
183 enum sb_hw_class_bits
184 {
185 	HB_R6	= (1<<0),
186 	HB_R7	= (1<<1),
187 	HB_EG	= (1<<2),
188 	HB_CM	= (1<<3),
189 
190 	HB_R6R7 = (HB_R6 | HB_R7),
191 	HB_EGCM = (HB_EG | HB_CM),
192 	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193 	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194 
195 	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196 };
197 
198 enum sb_hw_chip
199 {
200 	HW_CHIP_UNKNOWN,
201 	HW_CHIP_R600,
202 	HW_CHIP_RV610,
203 	HW_CHIP_RV630,
204 	HW_CHIP_RV670,
205 	HW_CHIP_RV620,
206 	HW_CHIP_RV635,
207 	HW_CHIP_RS780,
208 	HW_CHIP_RS880,
209 	HW_CHIP_RV770,
210 	HW_CHIP_RV730,
211 	HW_CHIP_RV710,
212 	HW_CHIP_RV740,
213 	HW_CHIP_CEDAR,
214 	HW_CHIP_REDWOOD,
215 	HW_CHIP_JUNIPER,
216 	HW_CHIP_CYPRESS,
217 	HW_CHIP_HEMLOCK,
218 	HW_CHIP_PALM,
219 	HW_CHIP_SUMO,
220 	HW_CHIP_SUMO2,
221 	HW_CHIP_BARTS,
222 	HW_CHIP_TURKS,
223 	HW_CHIP_CAICOS,
224 	HW_CHIP_CAYMAN,
225 	HW_CHIP_ARUBA
226 };
227 
228 enum sb_hw_class
229 {
230 	HW_CLASS_UNKNOWN,
231 	HW_CLASS_R600,
232 	HW_CLASS_R700,
233 	HW_CLASS_EVERGREEN,
234 	HW_CLASS_CAYMAN
235 };
236 
237 enum alu_slots {
238 	SLOT_X = 0,
239 	SLOT_Y = 1,
240 	SLOT_Z = 2,
241 	SLOT_W = 3,
242 	SLOT_TRANS = 4
243 };
244 
245 enum misc_consts {
246 	MAX_ALU_LITERALS = 4,
247 	MAX_ALU_SLOTS = 128,
248 	MAX_GPR = 128,
249 	MAX_CHAN = 4
250 
251 };
252 
253 enum alu_src_sel {
254 
255 	ALU_SRC_LDS_OQ_A = 219,
256 	ALU_SRC_LDS_OQ_B = 220,
257 	ALU_SRC_LDS_OQ_A_POP = 221,
258 	ALU_SRC_LDS_OQ_B_POP = 222,
259 	ALU_SRC_LDS_DIRECT_A = 223,
260 	ALU_SRC_LDS_DIRECT_B = 224,
261 	ALU_SRC_TIME_HI = 227,
262 	ALU_SRC_TIME_LO = 228,
263 	ALU_SRC_MASK_HI = 229,
264 	ALU_SRC_MASK_LO = 230,
265 	ALU_SRC_HW_WAVE_ID = 231,
266 	ALU_SRC_SIMD_ID = 232,
267 	ALU_SRC_SE_ID = 233,
268 	ALU_SRC_HW_THREADGRP_ID = 234,
269 	ALU_SRC_WAVE_ID_IN_GRP = 235,
270 	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271 	ALU_SRC_HW_ALU_ODD = 237,
272 	ALU_SRC_LOOP_IDX = 238,
273 	ALU_SRC_PARAM_BASE_ADDR = 240,
274 	ALU_SRC_NEW_PRIM_MASK = 241,
275 	ALU_SRC_PRIM_MASK_HI = 242,
276 	ALU_SRC_PRIM_MASK_LO = 243,
277 	ALU_SRC_1_DBL_L = 244,
278 	ALU_SRC_1_DBL_M = 245,
279 	ALU_SRC_0_5_DBL_L = 246,
280 	ALU_SRC_0_5_DBL_M = 247,
281 	ALU_SRC_0 = 248,
282 	ALU_SRC_1 = 249,
283 	ALU_SRC_1_INT = 250,
284 	ALU_SRC_M_1_INT = 251,
285 	ALU_SRC_0_5 = 252,
286 	ALU_SRC_LITERAL = 253,
287 	ALU_SRC_PV = 254,
288 	ALU_SRC_PS = 255,
289 
290 	ALU_SRC_PARAM_OFFSET = 448
291 };
292 
293 enum alu_predicate_select
294 {
295 	PRED_SEL_OFF	= 0,
296 //	RESERVED		= 1,
297 	PRED_SEL_0		= 2,
298 	PRED_SEL_1		= 3
299 };
300 
301 
302 enum alu_omod {
303 	OMOD_OFF  = 0,
304 	OMOD_M2   = 1,
305 	OMOD_M4   = 2,
306 	OMOD_D2   = 3
307 };
308 
309 enum alu_index_mode {
310 	INDEX_AR_X        = 0,
311 	INDEX_AR_Y_R600   = 1,
312 	INDEX_AR_Z_R600   = 2,
313 	INDEX_AR_W_R600   = 3,
314 
315 	INDEX_LOOP        = 4,
316 	INDEX_GLOBAL      = 5,
317 	INDEX_GLOBAL_AR_X = 6
318 };
319 
320 enum alu_cayman_mova_dst {
321 	CM_MOVADST_AR_X,
322 	CM_MOVADST_PC,
323 	CM_MOVADST_IDX0,
324 	CM_MOVADST_IDX1,
325 	CM_MOVADST_CG0,		// clause-global byte 0
326 	CM_MOVADST_CG1,
327 	CM_MOVADST_CG2,
328 	CM_MOVADST_CG3
329 };
330 
331 enum alu_cayman_exec_mask_op {
332 	CM_EMO_DEACTIVATE,
333 	CM_EMO_BREAK,
334 	CM_EMO_CONTINUE,
335 	CM_EMO_KILL
336 };
337 
338 
339 enum cf_exp_type {
340 	EXP_PIXEL,
341 	EXP_POS,
342 	EXP_PARAM,
343 
344 	EXP_TYPE_COUNT
345 };
346 
347 enum cf_mem_type {
348 	MEM_WRITE,
349 	MEM_WRITE_IND,
350 	MEM_WRITE_ACK,
351 	MEM_WRITE_IND_ACK
352 };
353 
354 
355 enum alu_kcache_mode {
356 	KC_LOCK_NONE,
357 	KC_LOCK_1,
358 	KC_LOCK_2,
359 	KC_LOCK_LOOP
360 };
361 
362 enum alu_kcache_index_mode {
363 	KC_INDEX_NONE,
364 	KC_INDEX_0,
365 	KC_INDEX_1,
366 	KC_INDEX_INVALID
367 };
368 
369 enum chan_select {
370 	SEL_X	= 0,
371 	SEL_Y	= 1,
372 	SEL_Z	= 2,
373 	SEL_W	= 3,
374 	SEL_0	= 4,
375 	SEL_1	= 5,
376 //	RESERVED = 6,
377 	SEL_MASK = 7
378 };
379 
380 enum bank_swizzle {
381 	VEC_012 = 0,
382 	VEC_021 = 1,
383 	VEC_120 = 2,
384 	VEC_102 = 3,
385 	VEC_201 = 4,
386 	VEC_210 = 5,
387 
388 	VEC_NUM = 6,
389 
390 	SCL_210 = 0,
391 	SCL_122 = 1,
392 	SCL_212 = 2,
393 	SCL_221 = 3,
394 
395 	SCL_NUM = 4
396 
397 };
398 
399 enum sched_queue_id {
400 	SQ_CF,
401 	SQ_ALU,
402 	SQ_TEX,
403 	SQ_VTX,
404 
405 	SQ_NUM
406 };
407 
408 struct literal {
409 	union {
410 		int32_t i;
411 		uint32_t u;
412 		float f;
413 	};
414 
iliteral415 	literal(int32_t i = 0) : i(i) {}
literalliteral416 	literal(uint32_t u) : u(u) {}
literalliteral417 	literal(float f) : f(f) {}
literalliteral418 	literal(double f) : f(f) {}
uint32_tliteral419 	operator uint32_t() const { return u; }
420 	bool operator ==(literal l) { return u == l.u; }
421 	bool operator ==(int v_int) { return i == v_int; }
422 	bool operator ==(unsigned v_uns) { return u == v_uns; }
423 };
424 
425 struct bc_kcache {
426 	unsigned mode;
427 	unsigned bank;
428 	unsigned addr;
429 	unsigned index_mode;
430 } ;
431 
432 // TODO optimize bc structures
433 
434 struct bc_cf {
435 
436 	bc_kcache kc[4];
437 
438 	unsigned id;
439 
440 
441 	const cf_op_info * op_ptr;
442 	unsigned op;
443 
444 	unsigned addr:32;
445 
446 	unsigned alt_const:1;
447 	unsigned uses_waterfall:1;
448 
449 	unsigned barrier:1;
450 	unsigned count:7;
451 	unsigned pop_count:3;
452 	unsigned call_count:6;
453 	unsigned whole_quad_mode:1;
454 	unsigned valid_pixel_mode:1;
455 
456 	unsigned jumptable_sel:3;
457 	unsigned cf_const:5;
458 	unsigned cond:2;
459 	unsigned end_of_program:1;
460 
461 	unsigned array_base:13;
462 	unsigned elem_size:2;
463 	unsigned index_gpr:7;
464 	unsigned rw_gpr:7;
465 	unsigned rw_rel:1;
466 	unsigned type:2;
467 
468 	unsigned burst_count:4;
469 	unsigned mark:1;
470 	unsigned sel[4];
471 
472 	unsigned array_size:12;
473 	unsigned comp_mask:4;
474 
475 	unsigned rat_id:4;
476 	unsigned rat_inst:6;
477 	unsigned rat_index_mode:2;
478 
set_opbc_cf479 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
480 
is_alu_extendedbc_cf481 	bool is_alu_extended() {
482 		assert(op_ptr->flags & CF_ALU);
483 		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
484 			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
485 			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
486 	}
487 
488 };
489 
490 struct bc_alu_src {
491 	unsigned sel:9;
492 	unsigned chan:2;
493 	unsigned neg:1;
494 	unsigned abs:1;
495 	unsigned rel:1;
496 	literal value;
497 };
498 
499 struct bc_alu {
500 	const alu_op_info * op_ptr;
501 	unsigned op;
502 
503 	bc_alu_src src[3];
504 
505 	unsigned dst_gpr:7;
506 	unsigned dst_chan:2;
507 	unsigned dst_rel:1;
508 	unsigned clamp:1;
509 	unsigned omod:2;
510 	unsigned bank_swizzle:3;
511 
512 	unsigned index_mode:3;
513 	unsigned last:1;
514 	unsigned pred_sel:2;
515 
516 	unsigned fog_merge:1;
517 	unsigned write_mask:1;
518 	unsigned update_exec_mask:1;
519 	unsigned update_pred:1;
520 
521 	unsigned slot:3;
522 
523 	unsigned lds_idx_offset:6;
524 
525 	alu_op_flags slot_flags;
526 
set_opbc_alu527 	void set_op(unsigned op) {
528 		this->op = op;
529 		op_ptr = r600_isa_alu(op);
530 	}
531 };
532 
533 struct bc_fetch {
534 	const fetch_op_info * op_ptr;
535 	unsigned op;
536 
537 	unsigned bc_frac_mode:1;
538 	unsigned fetch_whole_quad:1;
539 	unsigned resource_id:8;
540 
541 	unsigned src_gpr:7;
542 	unsigned src_rel:1;
543 	unsigned src_rel_global:1; /* for GDS ops */
544 	unsigned src_sel[4];
545 
546 	unsigned dst_gpr:7;
547 	unsigned dst_rel:1;
548 	unsigned dst_rel_global:1; /* for GDS ops */
549 	unsigned dst_sel[4];
550 
551 	unsigned alt_const:1;
552 
553 	unsigned inst_mod:2;
554 	unsigned resource_index_mode:2;
555 	unsigned sampler_index_mode:2;
556 
557 	unsigned coord_type[4];
558 	unsigned lod_bias:7;
559 
560 	unsigned offset[3];
561 
562 	unsigned sampler_id:5;
563 
564 
565 	unsigned fetch_type:2;
566 	unsigned mega_fetch_count:6;
567 	unsigned coalesced_read:1;
568 	unsigned structured_read:2;
569 	unsigned lds_req:1;
570 
571 	unsigned data_format:6;
572 	unsigned format_comp_all:1;
573 	unsigned num_format_all:2;
574 	unsigned semantic_id:8;
575 	unsigned srf_mode_all:1;
576 	unsigned use_const_fields:1;
577 
578 	unsigned const_buf_no_stride:1;
579 	unsigned endian_swap:2;
580 	unsigned mega_fetch:1;
581 
582 	unsigned src2_gpr:7; /* for GDS */
set_opbc_fetch583 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
584 };
585 
586 struct shader_stats {
587 	unsigned	ndw;
588 	unsigned	ngpr;
589 	unsigned	nstack;
590 
591 	unsigned	cf; // clause instructions not included
592 	unsigned	alu;
593 	unsigned	alu_clauses;
594 	unsigned	fetch_clauses;
595 	unsigned	fetch;
596 	unsigned	alu_groups;
597 
598 	unsigned	shaders;		// number of shaders (for accumulated stats)
599 
shader_statsshader_stats600 	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
601 			fetch_clauses(), fetch(), alu_groups(), shaders() {}
602 
603 	void collect(node *n);
604 	void accumulate(shader_stats &s);
605 	void dump();
606 	void dump_diff(shader_stats &s);
607 };
608 
609 class sb_context {
610 
611 public:
612 
613 	shader_stats src_stats, opt_stats;
614 
615 	r600_isa *isa;
616 
617 	sb_hw_chip hw_chip;
618 	sb_hw_class hw_class;
619 
620 	unsigned alu_temp_gprs;
621 	unsigned max_fetch;
622 	bool has_trans;
623 	unsigned vtx_src_num;
624 	unsigned num_slots;
625 	bool uses_mova_gpr;
626 
627 	bool r6xx_gpr_index_workaround;
628 
629 	bool stack_workaround_8xx;
630 	bool stack_workaround_9xx;
631 
632 	unsigned wavefront_size;
633 	unsigned stack_entry_size;
634 
635 	static unsigned dump_pass;
636 	static unsigned dump_stat;
637 
638 	static unsigned dry_run;
639 	static unsigned no_fallback;
640 	static unsigned safe_math;
641 
642 	static unsigned dskip_start;
643 	static unsigned dskip_end;
644 	static unsigned dskip_mode;
645 
sb_context()646 	sb_context() : src_stats(), opt_stats(), isa(0),
647 			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
648 
649 	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
650 
is_r600()651 	bool is_r600() {return hw_class == HW_CLASS_R600;}
is_r700()652 	bool is_r700() {return hw_class == HW_CLASS_R700;}
is_evergreen()653 	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
is_cayman()654 	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
is_egcm()655 	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
656 
needs_8xx_stack_workaround()657 	bool needs_8xx_stack_workaround() {
658 		if (!is_evergreen())
659 			return false;
660 
661 		switch (hw_chip) {
662 		case HW_CHIP_CYPRESS:
663 		case HW_CHIP_JUNIPER:
664 			return false;
665 		default:
666 			return true;
667 		}
668 	}
669 
needs_9xx_stack_workaround()670 	bool needs_9xx_stack_workaround() {
671 		return is_cayman();
672 	}
673 
hw_class_bit()674 	sb_hw_class_bits hw_class_bit() {
675 		switch (hw_class) {
676 		case HW_CLASS_R600:return HB_R6;
677 		case HW_CLASS_R700:return HB_R7;
678 		case HW_CLASS_EVERGREEN:return HB_EG;
679 		case HW_CLASS_CAYMAN:return HB_CM;
680 		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
681 
682 		}
683 	}
684 
cf_opcode(unsigned op)685 	unsigned cf_opcode(unsigned op) {
686 		return r600_isa_cf_opcode(isa->hw_class, op);
687 	}
688 
alu_opcode(unsigned op)689 	unsigned alu_opcode(unsigned op) {
690 		return r600_isa_alu_opcode(isa->hw_class, op);
691 	}
692 
alu_slots(unsigned op)693 	unsigned alu_slots(unsigned op) {
694 		return r600_isa_alu_slots(isa->hw_class, op);
695 	}
696 
alu_slots(const alu_op_info * op_ptr)697 	unsigned alu_slots(const alu_op_info * op_ptr) {
698 		return op_ptr->slots[isa->hw_class];
699 	}
700 
alu_slots_mask(const alu_op_info * op_ptr)701 	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
702 		unsigned mask = 0;
703 		unsigned slot_flags = alu_slots(op_ptr);
704 		if (slot_flags & AF_V)
705 			mask = 0x0F;
706 		if (!is_cayman() && (slot_flags & AF_S))
707 			mask |= 0x10;
708 		return mask;
709 	}
710 
fetch_opcode(unsigned op)711 	unsigned fetch_opcode(unsigned op) {
712 		return r600_isa_fetch_opcode(isa->hw_class, op);
713 	}
714 
is_kcache_sel(unsigned sel)715 	bool is_kcache_sel(unsigned sel) {
716 		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
717 	}
718 
719 	const char * get_hw_class_name();
720 	const char * get_hw_chip_name();
721 
722 };
723 
724 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
725 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
726 
727 class bc_decoder {
728 
729 	sb_context &ctx;
730 
731 	uint32_t* dw;
732 	unsigned ndw;
733 
734 public:
735 
bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)736 	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
737 		: ctx(sctx), dw(data), ndw(size) {}
738 
739 	int decode_cf(unsigned &i, bc_cf &bc);
740 	int decode_alu(unsigned &i, bc_alu &bc);
741 	int decode_fetch(unsigned &i, bc_fetch &bc);
742 
743 private:
744 	int decode_cf_alu(unsigned &i, bc_cf &bc);
745 	int decode_cf_exp(unsigned &i, bc_cf &bc);
746 	int decode_cf_mem(unsigned &i, bc_cf &bc);
747 
748 	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
749 	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
750 };
751 
752 // bytecode format definition
753 
754 class hw_encoding_format {
755 	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
756 	hw_encoding_format();
757 protected:
758 	uint32_t value;
759 public:
hw_encoding_format(sb_hw_class_bits hw)760 	hw_encoding_format(sb_hw_class_bits hw)
761 		: hw_target(hw), value(0) {}
hw_encoding_format(uint32_t v,sb_hw_class_bits hw)762 	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
763 		: hw_target(hw), value(v) {}
get_value(sb_hw_class_bits hw)764 	uint32_t get_value(sb_hw_class_bits hw) const {
765 		assert((hw & hw_target) == hw);
766 		return value;
767 	}
768 };
769 
770 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
771 class fmt##_##hwset : public hw_encoding_format {\
772 	typedef fmt##_##hwset thistype; \
773 public: \
774 	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
775 	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
776 
777 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
778 
779 #define BC_FORMAT_END(fmt) };
780 
781 // bytecode format field definition
782 
783 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
784 	thistype & name(unsigned v) { \
785 		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
786 		return *this; \
787 	} \
788 	unsigned get_##name() const { \
789 		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
790 	} \
791 
792 #define BC_RSRVD(fmt, last_bit, first_bit)
793 
794 // CLAMP macro defined elsewhere interferes with bytecode field name
795 #undef CLAMP
796 #include "sb_bc_fmt_def.inc"
797 
798 #undef BC_FORMAT_BEGIN
799 #undef BC_FORMAT_END
800 #undef BC_FIELD
801 #undef BC_RSRVD
802 
803 class bc_parser {
804 	sb_context & ctx;
805 
806 	bc_decoder *dec;
807 
808 	r600_bytecode *bc;
809 	r600_shader *pshader;
810 
811 	uint32_t *dw;
812 	unsigned bc_ndw;
813 
814 	unsigned max_cf;
815 
816 	shader *sh;
817 
818 	int error;
819 
820 	alu_node *slots[2][5];
821 	unsigned cgroup;
822 
823 	typedef std::vector<cf_node*> id_cf_map;
824 	id_cf_map cf_map;
825 
826 	typedef std::stack<region_node*> region_stack;
827 	region_stack loop_stack;
828 
829 	bool gpr_reladdr;
830 
831 	// Note: currently relies on input emitting SET_CF in same basic block as uses
832 	value *cf_index_value[2];
833 	alu_node *mova;
834 public:
835 
bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)836 	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
837 		ctx(sctx), dec(), bc(bc), pshader(pshader),
838 		dw(), bc_ndw(), max_cf(),
839 		sh(), error(), slots(), cgroup(),
840 		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
841 
842 	int decode();
843 	int prepare();
844 
get_shader()845 	shader* get_shader() { assert(!error); return sh; }
846 
847 private:
848 
849 	int decode_shader();
850 
851 	int parse_decls();
852 
853 	int decode_cf(unsigned &i, bool &eop);
854 
855 	int decode_alu_clause(cf_node *cf);
856 	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
857 
858 	int decode_fetch_clause(cf_node *cf);
859 
860 	int prepare_ir();
861 	int prepare_alu_clause(cf_node *cf);
862 	int prepare_alu_group(cf_node* cf, alu_group_node *g);
863 	int prepare_fetch_clause(cf_node *cf);
864 
865 	int prepare_loop(cf_node *c);
866 	int prepare_if(cf_node *c);
867 
868 	void save_set_cf_index(value *val, unsigned idx);
869 	value *get_cf_index_value(unsigned idx);
870 	void save_mova(alu_node *mova);
871 	alu_node *get_mova();
872 };
873 
874 
875 
876 
877 class bytecode {
878 	typedef std::vector<uint32_t> bc_vector;
879 	sb_hw_class_bits hw_class_bit;
880 
881 	bc_vector bc;
882 
883 	unsigned pos;
884 
885 public:
886 
887 	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
hw_class_bit(hw)888 		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
889 
ndw()890 	unsigned ndw() { return bc.size(); }
891 
write_data(uint32_t * dst)892 	void write_data(uint32_t* dst) {
893 		std::copy(bc.begin(), bc.end(), dst);
894 	}
895 
align(unsigned a)896 	void align(unsigned a) {
897 		unsigned size = bc.size();
898 		size = (size + a - 1) & ~(a-1);
899 		bc.resize(size);
900 	}
901 
set_size(unsigned sz)902 	void set_size(unsigned sz) {
903 		assert(sz >= bc.size());
904 		bc.resize(sz);
905 	}
906 
seek(unsigned p)907 	void seek(unsigned p) {
908 		if (p != pos) {
909 			if (p > bc.size()) {
910 				bc.resize(p);
911 			}
912 			pos = p;
913 		}
914 	}
915 
get_pos()916 	unsigned get_pos() { return pos; }
data()917 	uint32_t *data() { return &bc[0]; }
918 
919 	bytecode & operator <<(uint32_t v) {
920 		if (pos == ndw()) {
921 			bc.push_back(v);
922 		} else
923 			bc.at(pos) = v;
924 		++pos;
925 		return *this;
926 	}
927 
928 	bytecode & operator <<(const hw_encoding_format &e) {
929 		*this << e.get_value(hw_class_bit);
930 		return *this;
931 	}
932 
933 	bytecode & operator <<(const bytecode &b) {
934 		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
935 		return *this;
936 	}
937 
at(unsigned dw_id)938 	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
939 };
940 
941 
942 class bc_builder {
943 	shader &sh;
944 	sb_context &ctx;
945 	bytecode bb;
946 	int error;
947 
948 public:
949 
950 	bc_builder(shader &s);
951 	int build();
get_bytecode()952 	bytecode& get_bytecode() { assert(!error); return bb; }
953 
954 private:
955 
956 	int build_cf(cf_node *n);
957 
958 	int build_cf_alu(cf_node *n);
959 	int build_cf_mem(cf_node *n);
960 	int build_cf_exp(cf_node *n);
961 
962 	int build_alu_clause(cf_node *n);
963 	int build_alu_group(alu_group_node *n);
964 	int build_alu(alu_node *n);
965 
966 	int build_fetch_clause(cf_node *n);
967 	int build_fetch_tex(fetch_node *n);
968 	int build_fetch_vtx(fetch_node *n);
969 };
970 
971 } // namespace r600_sb
972 
973 #endif /* SB_BC_H_ */
974