• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef R600_SB_IR_H_
28 #define R600_SB_IR_H_
29 
30 #include <algorithm>
31 #include <stdint.h>
32 #include <vector>
33 #include <set>
34 #include <algorithm>
35 
36 #include "sb_bc.h"
37 
38 namespace r600_sb {
39 
40 enum special_regs {
41 	SV_ALU_PRED = 128,
42 	SV_EXEC_MASK,
43 	SV_AR_INDEX,
44 	SV_VALID_MASK,
45 	SV_GEOMETRY_EMIT,
46 	SV_LDS_RW,
47 	SV_LDS_OQA,
48 	SV_LDS_OQB,
49 };
50 
51 class node;
52 class value;
53 class shader;
54 
55 struct sel_chan
56 {
57 	unsigned id;
58 
idsel_chan59 	sel_chan(unsigned id = 0) : id(id) {}
sel_chansel_chan60 	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
61 
selsel_chan62 	unsigned sel() const { return sel(id); }
chansel_chan63 	unsigned chan() const {return chan(id); }
64 	operator unsigned() const {return id;}
65 
selsel_chan66 	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
chansel_chan67 	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
68 
sel_chansel_chan69 	sel_chan(unsigned bank, unsigned index,
70 			 unsigned chan, alu_kcache_index_mode index_mode)
71 		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
kcache_index_modesel_chan72 	unsigned kcache_index_mode() const { return sel() >> 28; }
kcache_selsel_chan73 	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
kcache_banksel_chan74 	unsigned kcache_bank() const { return kcache_sel() >> 12; }
75 };
76 
77 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
78 	static const char * ch = "xyzw";
79 	o << r.sel() << "." << ch[r.chan()];
80 	return o;
81 }
82 
83 typedef std::vector<value*>  vvec;
84 
85 class sb_pool {
86 protected:
87 	static const unsigned SB_POOL_ALIGN = 8;
88 	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
89 
90 	typedef std::vector<void*> block_vector;
91 
92 	unsigned block_size;
93 	block_vector blocks;
94 	unsigned total_size;
95 
96 public:
97 	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
block_size(block_size)98 		: block_size(block_size), blocks(), total_size() {}
99 
~sb_pool()100 	virtual ~sb_pool() { free_all(); }
101 
102 	void* allocate(unsigned sz);
103 
104 protected:
105 	void free_all();
106 };
107 
108 template <typename V, typename Comp = std::less<V> >
109 class sb_set {
110 	typedef std::vector<V> data_vector;
111 	data_vector vec;
112 public:
113 
114 	typedef typename data_vector::iterator iterator;
115 	typedef typename data_vector::const_iterator const_iterator;
116 
sb_set()117 	sb_set() : vec() {}
~sb_set()118 	~sb_set() {  }
119 
begin()120 	iterator begin() { return vec.begin(); }
end()121 	iterator end() { return vec.end(); }
begin()122 	const_iterator begin() const { return vec.begin(); }
end()123 	const_iterator end() const { return vec.end(); }
124 
add_set(const sb_set & s)125 	void add_set(const sb_set& s) {
126 		data_vector t;
127 		t.reserve(vec.size() + s.vec.size());
128 		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
129 		          std::inserter(t, t.begin()), Comp());
130 		vec.swap(t);
131 	}
132 
lower_bound(const V & v)133 	iterator lower_bound(const V& v) {
134 		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
135 	}
136 
insert(const V & v)137 	std::pair<iterator, bool> insert(const V& v) {
138 		iterator P = lower_bound(v);
139 		if (P != vec.end() && is_equal(*P, v))
140 			return std::make_pair(P, false);
141 		return std::make_pair(vec.insert(P, v), true);
142 	}
143 
erase(const V & v)144 	unsigned erase(const V&  v) {
145 		iterator P = lower_bound(v);
146 		if (P == vec.end() || !is_equal(*P, v))
147 			return 0;
148 		vec.erase(P);
149 		return 1;
150 	}
151 
clear()152 	void clear() { vec.clear(); }
153 
empty()154 	bool empty() { return vec.empty(); }
155 
is_equal(const V & v1,const V & v2)156 	bool is_equal(const V& v1, const V& v2) {
157 		return !Comp()(v1, v2) && !Comp()(v2, v1);
158 	}
159 
find(const V & v)160 	iterator find(const V& v) {
161 		iterator P = lower_bound(v);
162 		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
163 	}
164 
size()165 	unsigned size() { return vec.size(); }
erase(iterator I)166 	void erase(iterator I) { vec.erase(I); }
167 };
168 
169 template <typename K, typename V, typename KComp = std::less<K> >
170 class sb_map {
171 	typedef std::pair<K, V> datatype;
172 
173 	struct Comp {
operatorComp174 		bool operator()(const datatype &v1, const datatype &v2) {
175 			return KComp()(v1.first, v2.first);
176 		}
177 	};
178 
179 	typedef sb_set<datatype, Comp> dataset;
180 
181 	dataset set;
182 
183 public:
184 
sb_map()185 	sb_map() : set() {}
186 
187 	typedef typename dataset::iterator iterator;
188 
begin()189 	iterator begin() { return set.begin(); }
end()190 	iterator end() { return set.end(); }
191 
clear()192 	void clear() { set.clear(); }
193 
194 	V& operator[](const K& key) {
195 		datatype P = std::make_pair(key, V());
196 		iterator F = set.find(P);
197 		if (F == set.end()) {
198 			return (*(set.insert(P).first)).second;
199 		} else {
200 			return (*F).second;
201 		}
202 	}
203 
insert(const datatype & d)204 	std::pair<iterator, bool> insert(const datatype& d) {
205 		return set.insert(d);
206 	}
207 
find(const K & key)208 	iterator find(const K& key) {
209 		return set.find(std::make_pair(key, V()));
210 	}
211 
erase(const K & key)212 	unsigned erase(const K& key) {
213 		return set.erase(std::make_pair(key, V()));
214 	}
215 
erase(iterator I)216 	void erase(iterator I) {
217 		set.erase(I);
218 	}
219 };
220 
221 class sb_bitset {
222 	typedef uint32_t basetype;
223 	static const unsigned bt_bits = sizeof(basetype) << 3;
224 	std::vector<basetype> data;
225 	unsigned bit_size;
226 
227 public:
228 
sb_bitset()229 	sb_bitset() : data(), bit_size() {}
230 
231 	bool get(unsigned id);
232 	void set(unsigned id, bool bit = true);
233 	bool set_chk(unsigned id, bool bit = true);
234 
235 	void clear();
236 	void resize(unsigned size);
237 
size()238 	unsigned size() { return bit_size; }
239 
240 	unsigned find_bit(unsigned start = 0);
241 
242 	void swap(sb_bitset & bs2);
243 
244 	bool operator==(const sb_bitset &bs2);
245 	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
246 
247 	sb_bitset& operator|=(const sb_bitset &bs2) {
248 		if (bit_size < bs2.bit_size) {
249 			resize(bs2.bit_size);
250 		}
251 
252 		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
253 				++i) {
254 			data[i] |= bs2.data[i];
255 		}
256 		return *this;
257 	}
258 
259 	sb_bitset& operator&=(const sb_bitset &bs2);
260 	sb_bitset& mask(const sb_bitset &bs2);
261 
262 	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
263 			sb_bitset nbs(b1);
264 			nbs |= b2;
265 			return nbs;
266 	}
267 };
268 
269 enum value_kind {
270 	VLK_REG,
271 	VLK_REL_REG,
272 	VLK_SPECIAL_REG,
273 	VLK_TEMP,
274 
275 	VLK_CONST,
276 	VLK_KCACHE,
277 	VLK_PARAM,
278 	VLK_SPECIAL_CONST,
279 
280 	VLK_UNDEF
281 };
282 
283 
284 
285 class sb_value_pool : protected sb_pool {
286 	unsigned aligned_elt_size;
287 
288 public:
289 	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
290 		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
291 				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
292 
~sb_value_pool()293 	virtual ~sb_value_pool() { delete_all(); }
294 
295 	value* create(value_kind k, sel_chan regid, unsigned ver);
296 
297 	value* operator[](unsigned id) {
298 		unsigned offset = id * aligned_elt_size;
299 		unsigned block_id;
300 		if (offset < block_size) {
301 			block_id = 0;
302 		} else {
303 			block_id = offset / block_size;
304 			offset = offset % block_size;
305 		}
306 		return (value*)((char*)blocks[block_id] + offset);
307 	}
308 
size()309 	unsigned size() { return total_size / aligned_elt_size; }
310 
311 protected:
312 	void delete_all();
313 };
314 
315 
316 
317 
318 
319 class sb_value_set {
320 
321 	sb_bitset bs;
322 
323 public:
sb_value_set()324 	sb_value_set() : bs() {}
325 
326 	class iterator {
327 		sb_value_pool &vp;
328 		sb_value_set *s;
329 		unsigned nb;
330 	public:
331 		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
332 
333 
334 		iterator& operator++() {
335 			if (nb + 1 < s->bs.size())
336 				nb = s->bs.find_bit(nb + 1);
337 			else
338 				nb = s->bs.size();
339 			return *this;
340 		}
341 		bool operator !=(const iterator &i) {
342 			return s != i.s || nb != i.nb;
343 		}
344 		bool operator ==(const iterator &i) { return !(*this != i); }
345 		value* operator *() {
346 			 return vp[nb];
347 		}
348 
349 
350 	};
351 
begin(shader & sh)352 	iterator begin(shader &sh) {
353 		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
354 	}
end(shader & sh)355 	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
356 
357 	bool add_set_checked(sb_value_set & s2);
358 
add_set(sb_value_set & s2)359 	void add_set(sb_value_set & s2)  {
360 		if (bs.size() < s2.bs.size())
361 			bs.resize(s2.bs.size());
362 		bs |= s2.bs;
363 	}
364 
365 	void remove_set(sb_value_set & s2);
366 
367 	bool add_vec(vvec &vv);
368 
369 	bool add_val(value *v);
370 	bool contains(value *v);
371 
372 	bool remove_val(value *v);
373 
374 	bool remove_vec(vvec &vv);
375 
376 	void clear();
377 
378 	bool empty();
379 };
380 
381 typedef sb_value_set val_set;
382 
383 struct gpr_array {
384 	sel_chan base_gpr; // original gpr
385 	sel_chan gpr; // assigned by regalloc
386 	unsigned array_size;
387 
gpr_arraygpr_array388 	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
389 			array_size(array_size) {}
390 
hashgpr_array391 	unsigned hash() { return (base_gpr << 10) * array_size; }
392 
393 	val_set interferences;
394 	vvec refs;
395 
396 	bool is_dead();
397 
398 };
399 
400 typedef std::vector<gpr_array*> regarray_vec;
401 
402 enum value_flags {
403 	VLF_UNDEF = (1 << 0),
404 	VLF_READONLY = (1 << 1),
405 	VLF_DEAD = (1 << 2),
406 
407 	VLF_PIN_REG = (1 << 3),
408 	VLF_PIN_CHAN = (1 << 4),
409 
410 	// opposite to alu clause local value - goes through alu clause boundary
411 	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
412 	VLF_GLOBAL = (1 << 5),
413 	VLF_FIXED = (1 << 6),
414 	VLF_PVPS = (1 << 7),
415 
416 	VLF_PREALLOC = (1 << 8)
417 };
418 
419 inline value_flags operator |(value_flags l, value_flags r) {
420 	return (value_flags)((unsigned)l|(unsigned)r);
421 }
422 inline value_flags operator &(value_flags l, value_flags r) {
423 	return (value_flags)((unsigned)l&(unsigned)r);
424 }
425 inline value_flags operator ~(value_flags l) {
426 	return (value_flags)(~(unsigned)l);
427 }
428 inline value_flags& operator |=(value_flags &l, value_flags r) {
429 	l = l | r;
430 	return l;
431 }
432 inline value_flags& operator &=(value_flags &l, value_flags r) {
433 	l = l & r;
434 	return l;
435 }
436 
437 sb_ostream& operator << (sb_ostream &o, value &v);
438 
439 typedef uint32_t value_hash;
440 
441 typedef std::list< node * > uselist;
442 
443 enum constraint_kind {
444 	CK_SAME_REG,
445 	CK_PACKED_BS,
446 	CK_PHI
447 };
448 
449 class shader;
450 class sb_value_pool;
451 struct ra_chunk;
452 class ra_constraint;
453 
454 class value {
455 protected:
456 	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
kind(k)457 		: kind(k), flags(),
458 			rel(), array(),
459 			version(ver), select(select), pin_gpr(select), gpr(),
460 			gvn_source(), ghash(),
461 			def(), adef(), uses(), constraint(), chunk(),
462 			literal_value(), uid(sh_id) {}
463 
~value()464 	~value() { delete_uses(); }
465 
466 	friend class sb_value_pool;
467 public:
468 	value_kind kind;
469 	value_flags flags;
470 
471 	vvec mdef;
472 	vvec muse;
473 	value *rel;
474 	gpr_array *array;
475 
476 	unsigned version;
477 
478 	sel_chan select;
479 	sel_chan pin_gpr;
480 	sel_chan gpr;
481 
482 	value *gvn_source;
483 	value_hash ghash;
484 
485 	node *def, *adef;
486 	uselist uses;
487 
488 	ra_constraint *constraint;
489 	ra_chunk *chunk;
490 
491 	literal literal_value;
492 
is_const()493 	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
494 
is_AR()495 	bool is_AR() {
496 		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
497 	}
is_geometry_emit()498 	bool is_geometry_emit() {
499 		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
500 	}
is_lds_access()501 	bool is_lds_access() {
502 		return is_special_reg() && select == sel_chan(SV_LDS_RW, 0);
503 	}
is_lds_oq()504 	bool is_lds_oq() {
505 		return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0));
506 	}
507 
any_def()508 	node* any_def() {
509 		assert(!(def && adef));
510 		return def ? def : adef;
511 	}
512 
gvalue()513 	value* gvalue() {
514 		value *v = this;
515 		while (v->gvn_source && v != v->gvn_source)
516 			// FIXME we really shouldn't have such chains
517 			v = v->gvn_source;
518 		return v;
519 	}
520 
is_float_0_or_1()521 	bool is_float_0_or_1() {
522 		value *v = gvalue();
523 		return v->is_const() && (v->literal_value == literal(0)
524 						|| v->literal_value == literal(1.0f));
525 	}
526 
is_undef()527 	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
528 
is_any_gpr()529 	bool is_any_gpr() {
530 		return (kind == VLK_REG || kind == VLK_TEMP);
531 	}
532 
is_agpr()533 	bool is_agpr() {
534 		return array && is_any_gpr();
535 	}
536 
537 	// scalar gpr, as opposed to element of gpr array
is_sgpr()538 	bool is_sgpr() {
539 		return !array && is_any_gpr();
540 	}
541 
is_special_reg()542 	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
is_any_reg()543 	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
is_kcache()544 	bool is_kcache() { return kind == VLK_KCACHE; }
is_rel()545 	bool is_rel() {	return kind == VLK_REL_REG; }
is_readonly()546 	bool is_readonly() { return flags & VLF_READONLY; }
547 
is_chan_pinned()548 	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
is_reg_pinned()549 	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
550 
551 	bool is_global();
552 	void set_global();
553 	void set_prealloc();
554 
555 	bool is_prealloc();
556 
557 	bool is_fixed();
558 	void fix();
559 
is_dead()560 	bool is_dead() { return flags & VLF_DEAD; }
561 
get_const_value()562 	literal & get_const_value() {
563 		value *v = gvalue();
564 		assert(v->is_const());
565 		return v->literal_value;
566 	}
567 
568 	// true if needs to be encoded as literal in alu
is_literal()569 	bool is_literal() {
570 		return is_const()
571 				&& literal_value != literal(0)
572 				&& literal_value != literal(1)
573 				&& literal_value != literal(-1)
574 				&& literal_value != literal(0.5)
575 				&& literal_value != literal(1.0);
576 	}
577 
578 	void add_use(node *n);
579 	void remove_use(const node *n);
580 
581 	value_hash hash();
582 	value_hash rel_hash();
583 
assign_source(value * v)584 	void assign_source(value *v) {
585 		assert(!gvn_source || gvn_source == this);
586 		gvn_source = v->gvalue();
587 	}
588 
v_equal(value * v)589 	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
590 
591 	unsigned use_count();
592 	void delete_uses();
593 
get_final_gpr()594 	sel_chan get_final_gpr() {
595 		if (array && array->gpr) {
596 			int reg_offset = select.sel() - array->base_gpr.sel();
597 			if (rel && rel->is_const())
598 				reg_offset += rel->get_const_value().i;
599 			return array->gpr + (reg_offset << 2);
600 		} else {
601 			return gpr;
602 		}
603 	}
604 
get_final_chan()605 	unsigned get_final_chan() {
606 		if (array) {
607 			assert(array->gpr);
608 			return array->gpr.chan();
609 		} else {
610 			assert(gpr);
611 			return gpr.chan();
612 		}
613 	}
614 
615 	val_set interferences;
616 	unsigned uid;
617 };
618 
619 class expr_handler;
620 
621 class value_table {
622 	typedef std::vector<value*> vt_item;
623 	typedef std::vector<vt_item> vt_table;
624 
625 	expr_handler &ex;
626 
627 	unsigned size_bits;
628 	unsigned size;
629 	unsigned size_mask;
630 
631 	vt_table hashtable;
632 
633 	unsigned cnt;
634 
635 public:
636 
637 	value_table(expr_handler &ex, unsigned size_bits = 10)
ex(ex)638 		: ex(ex), size_bits(size_bits), size(1u << size_bits),
639 		  size_mask(size - 1), hashtable(size), cnt() {}
640 
~value_table()641 	~value_table() {}
642 
643 	void add_value(value* v);
644 
645 	bool expr_equal(value* l, value* r);
646 
count()647 	unsigned count() { return cnt; }
648 
649 	void get_values(vvec & v);
650 };
651 
652 class sb_context;
653 
654 enum node_type {
655 	NT_UNKNOWN,
656 	NT_LIST,
657 	NT_OP,
658 	NT_REGION,
659 	NT_REPEAT,
660 	NT_DEPART,
661 	NT_IF,
662 };
663 
664 enum node_subtype {
665 	NST_UNKNOWN,
666 	NST_LIST,
667 	NST_ALU_GROUP,
668 	NST_ALU_CLAUSE,
669 	NST_ALU_INST,
670 	NST_ALU_PACKED_INST,
671 	NST_CF_INST,
672 	NST_FETCH_INST,
673 	NST_TEX_CLAUSE,
674 	NST_VTX_CLAUSE,
675 	NST_GDS_CLAUSE,
676 
677 	NST_BB,
678 
679 	NST_PHI,
680 	NST_PSI,
681 	NST_COPY,
682 
683 	NST_LOOP_PHI_CONTAINER,
684 	NST_LOOP_CONTINUE,
685 	NST_LOOP_BREAK
686 };
687 
688 enum node_flags {
689 	NF_EMPTY = 0,
690 	NF_DEAD = (1 << 0),
691 	NF_REG_CONSTRAINT = (1 << 1),
692 	NF_CHAN_CONSTRAINT = (1 << 2),
693 	NF_ALU_4SLOT = (1 << 3),
694 	NF_CONTAINER = (1 << 4),
695 
696 	NF_COPY_MOV = (1 << 5),
697 
698 	NF_DONT_KILL = (1 << 6),
699 	NF_DONT_HOIST = (1 << 7),
700 	NF_DONT_MOVE = (1 << 8),
701 
702 	// for KILLxx - we want to schedule them as early as possible
703 	NF_SCHEDULE_EARLY = (1 << 9),
704 
705 	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
706 	NF_ALU_STACK_WORKAROUND = (1 << 10)
707 };
708 
709 inline node_flags operator |(node_flags l, node_flags r) {
710 	return (node_flags)((unsigned)l|(unsigned)r);
711 }
712 inline node_flags& operator |=(node_flags &l, node_flags r) {
713 	l = l | r;
714 	return l;
715 }
716 
717 inline node_flags& operator &=(node_flags &l, node_flags r) {
718 	l = (node_flags)((unsigned)l & (unsigned)r);
719 	return l;
720 }
721 
722 inline node_flags operator ~(node_flags r) {
723 	return (node_flags)~(unsigned)r;
724 }
725 
726 struct node_stats {
727 	unsigned alu_count;
728 	unsigned alu_kill_count;
729 	unsigned alu_copy_mov_count;
730 	unsigned cf_count;
731 	unsigned fetch_count;
732 	unsigned region_count;
733 	unsigned loop_count;
734 	unsigned phi_count;
735 	unsigned loop_phi_count;
736 	unsigned depart_count;
737 	unsigned repeat_count;
738 	unsigned if_count;
739        bool uses_ar;
740 
node_statsnode_stats741 	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
742 			cf_count(), fetch_count(), region_count(),
743 			loop_count(), phi_count(), loop_phi_count(), depart_count(),
744                        repeat_count(), if_count(), uses_ar(false) {}
745 
746 	void dump();
747 };
748 
749 class shader;
750 
751 class vpass;
752 
753 class container_node;
754 class region_node;
755 
756 class node {
757 
758 protected:
759 	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
prev()760 	: prev(), next(), parent(),
761 	  type(nt), subtype(nst), flags(flags),
762 	  pred(), dst(), src() {}
763 
~node()764 	virtual ~node() {};
765 
766 public:
767 	node *prev, *next;
768 	container_node *parent;
769 
770 	node_type type;
771 	node_subtype subtype;
772 	node_flags flags;
773 
774 	value *pred;
775 
776 	vvec dst;
777 	vvec src;
778 
is_valid()779 	virtual bool is_valid() { return true; }
780 	virtual bool accept(vpass &p, bool enter);
781 
782 	void insert_before(node *n);
783 	void insert_after(node *n);
784 	void replace_with(node *n);
785 	void remove();
786 
787 	virtual value_hash hash() const;
788 	value_hash hash_src() const;
789 
790 	virtual bool fold_dispatch(expr_handler *ex);
791 
is_container()792 	bool is_container() { return flags & NF_CONTAINER; }
793 
is_alu_packed()794 	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
is_alu_inst()795 	bool is_alu_inst() { return subtype == NST_ALU_INST; }
is_alu_group()796 	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
is_alu_clause()797 	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
798 
is_fetch_clause()799 	bool is_fetch_clause() {
800 		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
801 	}
802 
is_copy()803 	bool is_copy() { return subtype == NST_COPY; }
is_copy_mov()804 	bool is_copy_mov() { return flags & NF_COPY_MOV; }
is_any_alu()805 	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
806 
is_fetch_inst()807 	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
is_cf_inst()808 	bool is_cf_inst() { return subtype == NST_CF_INST; }
809 
is_region()810 	bool is_region() { return type == NT_REGION; }
is_depart()811 	bool is_depart() { return type == NT_DEPART; }
is_repeat()812 	bool is_repeat() { return type == NT_REPEAT; }
is_if()813 	bool is_if() { return type == NT_IF; }
is_bb()814 	bool is_bb() { return subtype == NST_BB; }
815 
is_phi()816 	bool is_phi() { return subtype == NST_PHI; }
817 
is_dead()818 	bool is_dead() { return flags & NF_DEAD; }
819 
820 	bool is_cf_op(unsigned op);
821 	bool is_alu_op(unsigned op);
822 	bool is_fetch_op(unsigned op);
823 
824 	unsigned cf_op_flags();
825 	unsigned alu_op_flags();
826 	unsigned alu_op_slot_flags();
827 	unsigned fetch_op_flags();
828 
829 	bool is_mova();
830 	bool is_pred_set();
831 
vec_uses_ar(vvec & vv)832 	bool vec_uses_ar(vvec &vv) {
833 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
834 			value *v = *I;
835 			if (v && v->rel && !v->rel->is_const())
836 				return true;
837 		}
838 		return false;
839 	}
840 
uses_ar()841 	bool uses_ar() {
842 		return vec_uses_ar(dst) || vec_uses_ar(src);
843 	}
844 
vec_uses_lds_oq(vvec & vv)845 	bool vec_uses_lds_oq(vvec &vv) {
846 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
847 			value *v = *I;
848 			if (v && v->is_lds_oq())
849 				return true;
850 		}
851 		return false;
852 	}
853 
consumes_lds_oq()854 	bool consumes_lds_oq() {
855 		return vec_uses_lds_oq(src);
856 	}
857 
produces_lds_oq()858 	bool produces_lds_oq() {
859 		return vec_uses_lds_oq(dst);
860 	}
861 
862 	region_node* get_parent_region();
863 
864 	friend class shader;
865 };
866 
867 class container_node : public node {
868 public:
869 
870 	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
871 	               node_flags flags = NF_EMPTY)
872 	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
873 	  live_after(), live_before() {}
874 
875 	// child items list
876 	node *first, *last;
877 
878 	val_set live_after;
879 	val_set live_before;
880 
881 	class iterator {
882 		node *p;
883 	public:
p(pp)884 		iterator(node *pp = NULL) : p(pp) {}
885 		iterator & operator ++() { p = p->next; return *this;}
886 		iterator & operator --() { p = p->prev; return *this;}
887 		node* operator *() { return p; }
888 		node* operator ->() { return p; }
advance(int n)889 		const iterator advance(int n) {
890 			if (!n) return *this;
891 			iterator I(p);
892 			if (n > 0) while (n--) ++I;
893 			else while (n++) --I;
894 			return I;
895 		}
896 		const iterator operator +(int n) { return advance(n); }
897 		const iterator operator -(int n) { return advance(-n); }
898 		bool operator !=(const iterator &i) { return p != i.p; }
899 		bool operator ==(const iterator &i) { return p == i.p; }
900 	};
901 
902 	class riterator {
903 		iterator i;
904 	public:
i(p)905 		riterator(node *p = NULL) : i(p) {}
906 		riterator & operator ++() { --i; return *this;}
907 		riterator & operator --() { ++i; return *this;}
908 		node* operator *() { return *i; }
909 		node* operator ->() { return *i; }
910 		bool operator !=(const riterator &r) { return i != r.i; }
911 		bool operator ==(const riterator &r) { return i == r.i; }
912 	};
913 
begin()914 	iterator begin() { return first; }
end()915 	iterator end() { return NULL; }
rbegin()916 	riterator rbegin() { return last; }
rend()917 	riterator rend() { return NULL; }
918 
empty()919 	bool empty() { assert(first != NULL || first == last); return !first; }
920 	unsigned count();
921 
922 	// used with node containers that represent shceduling queues
923 	// ignores copies and takes into account alu_packed_node items
924 	unsigned real_alu_count();
925 
926 	void push_back(node *n);
927 	void push_front(node *n);
928 
929 	void insert_node_before(node *s, node *n);
930 	void insert_node_after(node *s, node *n);
931 
932 	void append_from(container_node *c);
933 
934 	// remove range [b..e) from some container and assign to this container
935 	void move(iterator b, iterator e);
936 
937 	void expand();
938 	void expand(container_node *n);
939 	void remove_node(node *n);
940 
941 	node *cut(iterator b, iterator e);
942 
clear()943 	void clear() { first = last = NULL; }
944 
is_valid()945 	virtual bool is_valid() { return true; }
946 	virtual bool accept(vpass &p, bool enter);
947 	virtual bool fold_dispatch(expr_handler *ex);
948 
front()949 	node* front() { return first; }
back()950 	node* back() { return last; }
951 
952 	void collect_stats(node_stats &s);
953 
954 	friend class shader;
955 
956 
957 };
958 
959 typedef container_node::iterator node_iterator;
960 typedef container_node::riterator node_riterator;
961 
962 class alu_group_node : public container_node {
963 protected:
alu_group_node()964 	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
965 public:
966 
967 	std::vector<literal> literals;
968 
is_valid()969 	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
970 	virtual bool accept(vpass &p, bool enter);
971 
972 
literal_chan(literal l)973 	unsigned literal_chan(literal l) {
974 		std::vector<literal>::iterator F =
975 				std::find(literals.begin(), literals.end(), l);
976 		assert(F != literals.end());
977 		return F - literals.begin();
978 	}
979 
980 	friend class shader;
981 };
982 
983 class cf_node : public container_node {
984 protected:
cf_node()985 	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
986 		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
987 public:
988 	bc_cf bc;
989 
990 	cf_node *jump_target;
991 	bool jump_after_target;
992 
is_valid()993 	virtual bool is_valid() { return subtype == NST_CF_INST; }
994 	virtual bool accept(vpass &p, bool enter);
995 	virtual bool fold_dispatch(expr_handler *ex);
996 
jump(cf_node * c)997 	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
jump_after(cf_node * c)998 	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
999 
1000 	friend class shader;
1001 };
1002 
1003 class alu_node : public node {
1004 protected:
alu_node()1005 	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
1006 public:
1007 	bc_alu bc;
1008 
is_valid()1009 	virtual bool is_valid() { return subtype == NST_ALU_INST; }
1010 	virtual bool accept(vpass &p, bool enter);
1011 	virtual bool fold_dispatch(expr_handler *ex);
1012 
forced_bank_swizzle()1013 	unsigned forced_bank_swizzle() {
1014 		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
1015 				VEC_210 : 0;
1016 	}
1017 
1018 	// return param index + 1 if instruction references interpolation param,
1019 	// otherwise 0
1020 	unsigned interp_param();
1021 
1022 	alu_group_node *get_alu_group_node();
1023 
1024 	friend class shader;
1025 };
1026 
1027 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
1028 class alu_packed_node : public container_node {
1029 protected:
alu_packed_node()1030 	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
1031 public:
1032 
op_ptr()1033 	const alu_op_info* op_ptr() {
1034 		return static_cast<alu_node*>(first)->bc.op_ptr;
1035 	}
op()1036 	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
1037 	void init_args(bool repl);
1038 
is_valid()1039 	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
1040 	virtual bool accept(vpass &p, bool enter);
1041 	virtual bool fold_dispatch(expr_handler *ex);
1042 
1043 	unsigned get_slot_mask();
1044 	void update_packed_items(sb_context &ctx);
1045 
1046 	friend class shader;
1047 };
1048 
1049 class fetch_node : public node {
1050 protected:
fetch_node()1051 	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
1052 public:
1053 	bc_fetch bc;
1054 
is_valid()1055 	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
1056 	virtual bool accept(vpass &p, bool enter);
1057 	virtual bool fold_dispatch(expr_handler *ex);
1058 
uses_grad()1059 	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
1060 
1061 	friend class shader;
1062 };
1063 
1064 class region_node;
1065 
1066 class repeat_node : public container_node {
1067 protected:
repeat_node(region_node * target,unsigned id)1068 	repeat_node(region_node *target, unsigned id)
1069 	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
1070 public:
1071 	region_node *target;
1072 	unsigned rep_id;
1073 
1074 	virtual bool accept(vpass &p, bool enter);
1075 
1076 	friend class shader;
1077 };
1078 
1079 class depart_node : public container_node {
1080 protected:
depart_node(region_node * target,unsigned id)1081 	depart_node(region_node *target, unsigned id)
1082 	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
1083 public:
1084 	region_node *target;
1085 	unsigned dep_id;
1086 
1087 	virtual bool accept(vpass &p, bool enter);
1088 
1089 	friend class shader;
1090 };
1091 
1092 class if_node : public container_node {
1093 protected:
if_node()1094 	if_node() : container_node(NT_IF, NST_LIST), cond() {};
1095 public:
1096 	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
1097 
1098 	virtual bool accept(vpass &p, bool enter);
1099 
1100 	friend class shader;
1101 };
1102 
1103 typedef std::vector<depart_node*> depart_vec;
1104 typedef std::vector<repeat_node*> repeat_vec;
1105 
1106 class region_node : public container_node {
1107 protected:
region_node(unsigned id)1108 	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
1109 			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
1110 			{}
1111 public:
1112 	unsigned region_id;
1113 
1114 	container_node *loop_phi;
1115 	container_node *phi;
1116 
1117 	val_set vars_defined;
1118 
1119 	depart_vec departs;
1120 	repeat_vec repeats;
1121 
1122 	// true if region was created for loop in the parser, sometimes repeat_node
1123 	// may be optimized away so we need to remember this information
1124 	bool src_loop;
1125 
1126 	virtual bool accept(vpass &p, bool enter);
1127 
dep_count()1128 	unsigned dep_count() { return departs.size(); }
rep_count()1129 	unsigned rep_count() { return repeats.size() + 1; }
1130 
is_loop()1131 	bool is_loop() { return src_loop || !repeats.empty(); }
1132 
get_entry_code_location()1133 	container_node* get_entry_code_location() {
1134 		node *p = first;
1135 		while (p && (p->is_depart() || p->is_repeat()))
1136 			p = static_cast<container_node*>(p)->first;
1137 
1138 		container_node *c = static_cast<container_node*>(p);
1139 		if (c->is_bb())
1140 			return c;
1141 		else
1142 			return c->parent;
1143 	}
1144 
1145 	void expand_depart(depart_node *d);
1146 	void expand_repeat(repeat_node *r);
1147 
1148 	friend class shader;
1149 };
1150 
1151 class bb_node : public container_node {
1152 protected:
bb_node(unsigned id,unsigned loop_level)1153 	bb_node(unsigned id, unsigned loop_level)
1154 		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
1155 public:
1156 	unsigned id;
1157 	unsigned loop_level;
1158 
1159 	virtual bool accept(vpass &p, bool enter);
1160 
1161 	friend class shader;
1162 };
1163 
1164 
1165 typedef std::vector<region_node*> regions_vec;
1166 typedef std::vector<bb_node*> bbs_vec;
1167 typedef std::list<node*> sched_queue;
1168 typedef sched_queue::iterator sq_iterator;
1169 typedef std::vector<node*> node_vec;
1170 typedef std::list<node*> node_list;
1171 typedef std::set<node*> node_set;
1172 
1173 
1174 
1175 } // namespace r600_sb
1176 
1177 #endif /* R600_SB_IR_H_ */
1178