• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * on the rights to use, copy, modify, merge, publish, distribute, sub
8   * license, and/or sell copies of the Software, and to permit persons to whom
9   * the Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21   * USE OR OTHER DEALINGS IN THE SOFTWARE.
22   *
23   * Authors:
24   *      Vadim Girlin
25   */
26  
27  #ifndef R600_SB_IR_H_
28  #define R600_SB_IR_H_
29  
30  #include <algorithm>
31  #include <stdint.h>
32  #include <vector>
33  #include <set>
34  #include <algorithm>
35  
36  #include "sb_bc.h"
37  
38  namespace r600_sb {
39  
40  enum special_regs {
41  	SV_ALU_PRED = 128,
42  	SV_EXEC_MASK,
43  	SV_AR_INDEX,
44  	SV_VALID_MASK,
45  	SV_GEOMETRY_EMIT
46  };
47  
48  class node;
49  class value;
50  class shader;
51  
52  struct sel_chan
53  {
54  	unsigned id;
55  
idsel_chan56  	sel_chan(unsigned id = 0) : id(id) {}
sel_chansel_chan57  	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
58  
selsel_chan59  	unsigned sel() const { return sel(id); }
chansel_chan60  	unsigned chan() const {return chan(id); }
61  	operator unsigned() const {return id;}
62  
selsel_chan63  	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
chansel_chan64  	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
65  
sel_chansel_chan66  	sel_chan(unsigned bank, unsigned index,
67  			 unsigned chan, alu_kcache_index_mode index_mode)
68  		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
kcache_index_modesel_chan69  	unsigned kcache_index_mode() const { return sel() >> 28; }
kcache_selsel_chan70  	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
kcache_banksel_chan71  	unsigned kcache_bank() const { return kcache_sel() >> 12; }
72  };
73  
74  inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
75  	static const char * ch = "xyzw";
76  	o << r.sel() << "." << ch[r.chan()];
77  	return o;
78  }
79  
80  typedef std::vector<value*>  vvec;
81  
82  class sb_pool {
83  protected:
84  	static const unsigned SB_POOL_ALIGN = 8;
85  	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
86  
87  	typedef std::vector<void*> block_vector;
88  
89  	unsigned block_size;
90  	block_vector blocks;
91  	unsigned total_size;
92  
93  public:
94  	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
block_size(block_size)95  		: block_size(block_size), blocks(), total_size() {}
96  
~sb_pool()97  	virtual ~sb_pool() { free_all(); }
98  
99  	void* allocate(unsigned sz);
100  
101  protected:
102  	void free_all();
103  };
104  
105  template <typename V, typename Comp = std::less<V> >
106  class sb_set {
107  	typedef std::vector<V> data_vector;
108  	data_vector vec;
109  public:
110  
111  	typedef typename data_vector::iterator iterator;
112  	typedef typename data_vector::const_iterator const_iterator;
113  
sb_set()114  	sb_set() : vec() {}
~sb_set()115  	~sb_set() {  }
116  
begin()117  	iterator begin() { return vec.begin(); }
end()118  	iterator end() { return vec.end(); }
begin()119  	const_iterator begin() const { return vec.begin(); }
end()120  	const_iterator end() const { return vec.end(); }
121  
add_set(const sb_set & s)122  	void add_set(const sb_set& s) {
123  		data_vector t;
124  		t.reserve(vec.size() + s.vec.size());
125  		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
126  		          std::inserter(t, t.begin()), Comp());
127  		vec.swap(t);
128  	}
129  
lower_bound(const V & v)130  	iterator lower_bound(const V& v) {
131  		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
132  	}
133  
insert(const V & v)134  	std::pair<iterator, bool> insert(const V& v) {
135  		iterator P = lower_bound(v);
136  		if (P != vec.end() && is_equal(*P, v))
137  			return std::make_pair(P, false);
138  		return std::make_pair(vec.insert(P, v), true);
139  	}
140  
erase(const V & v)141  	unsigned erase(const V&  v) {
142  		iterator P = lower_bound(v);
143  		if (P == vec.end() || !is_equal(*P, v))
144  			return 0;
145  		vec.erase(P);
146  		return 1;
147  	}
148  
clear()149  	void clear() { vec.clear(); }
150  
empty()151  	bool empty() { return vec.empty(); }
152  
is_equal(const V & v1,const V & v2)153  	bool is_equal(const V& v1, const V& v2) {
154  		return !Comp()(v1, v2) && !Comp()(v2, v1);
155  	}
156  
find(const V & v)157  	iterator find(const V& v) {
158  		iterator P = lower_bound(v);
159  		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
160  	}
161  
size()162  	unsigned size() { return vec.size(); }
erase(iterator I)163  	void erase(iterator I) { vec.erase(I); }
164  };
165  
166  template <typename K, typename V, typename KComp = std::less<K> >
167  class sb_map {
168  	typedef std::pair<K, V> datatype;
169  
170  	struct Comp {
operatorComp171  		bool operator()(const datatype &v1, const datatype &v2) {
172  			return KComp()(v1.first, v2.first);
173  		}
174  	};
175  
176  	typedef sb_set<datatype, Comp> dataset;
177  
178  	dataset set;
179  
180  public:
181  
sb_map()182  	sb_map() : set() {}
183  
184  	typedef typename dataset::iterator iterator;
185  
begin()186  	iterator begin() { return set.begin(); }
end()187  	iterator end() { return set.end(); }
188  
clear()189  	void clear() { set.clear(); }
190  
191  	V& operator[](const K& key) {
192  		datatype P = std::make_pair(key, V());
193  		iterator F = set.find(P);
194  		if (F == set.end()) {
195  			return (*(set.insert(P).first)).second;
196  		} else {
197  			return (*F).second;
198  		}
199  	}
200  
insert(const datatype & d)201  	std::pair<iterator, bool> insert(const datatype& d) {
202  		return set.insert(d);
203  	}
204  
find(const K & key)205  	iterator find(const K& key) {
206  		return set.find(std::make_pair(key, V()));
207  	}
208  
erase(const K & key)209  	unsigned erase(const K& key) {
210  		return set.erase(std::make_pair(key, V()));
211  	}
212  
erase(iterator I)213  	void erase(iterator I) {
214  		set.erase(I);
215  	}
216  };
217  
218  class sb_bitset {
219  	typedef uint32_t basetype;
220  	static const unsigned bt_bits = sizeof(basetype) << 3;
221  	std::vector<basetype> data;
222  	unsigned bit_size;
223  
224  public:
225  
sb_bitset()226  	sb_bitset() : data(), bit_size() {}
227  
228  	bool get(unsigned id);
229  	void set(unsigned id, bool bit = true);
230  	bool set_chk(unsigned id, bool bit = true);
231  
232  	void clear();
233  	void resize(unsigned size);
234  
size()235  	unsigned size() { return bit_size; }
236  
237  	unsigned find_bit(unsigned start = 0);
238  
239  	void swap(sb_bitset & bs2);
240  
241  	bool operator==(const sb_bitset &bs2);
242  	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
243  
244  	sb_bitset& operator|=(const sb_bitset &bs2) {
245  		if (bit_size < bs2.bit_size) {
246  			resize(bs2.bit_size);
247  		}
248  
249  		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
250  				++i) {
251  			data[i] |= bs2.data[i];
252  		}
253  		return *this;
254  	}
255  
256  	sb_bitset& operator&=(const sb_bitset &bs2);
257  	sb_bitset& mask(const sb_bitset &bs2);
258  
259  	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
260  			sb_bitset nbs(b1);
261  			nbs |= b2;
262  			return nbs;
263  	}
264  };
265  
266  enum value_kind {
267  	VLK_REG,
268  	VLK_REL_REG,
269  	VLK_SPECIAL_REG,
270  	VLK_TEMP,
271  
272  	VLK_CONST,
273  	VLK_KCACHE,
274  	VLK_PARAM,
275  	VLK_SPECIAL_CONST,
276  
277  	VLK_UNDEF
278  };
279  
280  
281  
282  class sb_value_pool : protected sb_pool {
283  	unsigned aligned_elt_size;
284  
285  public:
286  	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
287  		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
288  				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
289  
~sb_value_pool()290  	virtual ~sb_value_pool() { delete_all(); }
291  
292  	value* create(value_kind k, sel_chan regid, unsigned ver);
293  
294  	value* operator[](unsigned id) {
295  		unsigned offset = id * aligned_elt_size;
296  		unsigned block_id;
297  		if (offset < block_size) {
298  			block_id = 0;
299  		} else {
300  			block_id = offset / block_size;
301  			offset = offset % block_size;
302  		}
303  		return (value*)((char*)blocks[block_id] + offset);
304  	}
305  
size()306  	unsigned size() { return total_size / aligned_elt_size; }
307  
308  protected:
309  	void delete_all();
310  };
311  
312  
313  
314  
315  
316  class sb_value_set {
317  
318  	sb_bitset bs;
319  
320  public:
sb_value_set()321  	sb_value_set() : bs() {}
322  
323  	class iterator {
324  		sb_value_pool &vp;
325  		sb_value_set *s;
326  		unsigned nb;
327  	public:
328  		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
329  
330  
331  		iterator& operator++() {
332  			if (nb + 1 < s->bs.size())
333  				nb = s->bs.find_bit(nb + 1);
334  			else
335  				nb = s->bs.size();
336  			return *this;
337  		}
338  		bool operator !=(const iterator &i) {
339  			return s != i.s || nb != i.nb;
340  		}
341  		bool operator ==(const iterator &i) { return !(*this != i); }
342  		value* operator *() {
343  			 return vp[nb];
344  		}
345  
346  
347  	};
348  
begin(shader & sh)349  	iterator begin(shader &sh) {
350  		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
351  	}
end(shader & sh)352  	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
353  
354  	bool add_set_checked(sb_value_set & s2);
355  
add_set(sb_value_set & s2)356  	void add_set(sb_value_set & s2)  {
357  		if (bs.size() < s2.bs.size())
358  			bs.resize(s2.bs.size());
359  		bs |= s2.bs;
360  	}
361  
362  	void remove_set(sb_value_set & s2);
363  
364  	bool add_vec(vvec &vv);
365  
366  	bool add_val(value *v);
367  	bool contains(value *v);
368  
369  	bool remove_val(value *v);
370  
371  	bool remove_vec(vvec &vv);
372  
373  	void clear();
374  
375  	bool empty();
376  };
377  
378  typedef sb_value_set val_set;
379  
380  struct gpr_array {
381  	sel_chan base_gpr; // original gpr
382  	sel_chan gpr; // assigned by regalloc
383  	unsigned array_size;
384  
gpr_arraygpr_array385  	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
386  			array_size(array_size) {}
387  
hashgpr_array388  	unsigned hash() { return (base_gpr << 10) * array_size; }
389  
390  	val_set interferences;
391  	vvec refs;
392  
393  	bool is_dead();
394  
395  };
396  
397  typedef std::vector<gpr_array*> regarray_vec;
398  
399  enum value_flags {
400  	VLF_UNDEF = (1 << 0),
401  	VLF_READONLY = (1 << 1),
402  	VLF_DEAD = (1 << 2),
403  
404  	VLF_PIN_REG = (1 << 3),
405  	VLF_PIN_CHAN = (1 << 4),
406  
407  	// opposite to alu clause local value - goes through alu clause boundary
408  	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
409  	VLF_GLOBAL = (1 << 5),
410  	VLF_FIXED = (1 << 6),
411  	VLF_PVPS = (1 << 7),
412  
413  	VLF_PREALLOC = (1 << 8)
414  };
415  
416  inline value_flags operator |(value_flags l, value_flags r) {
417  	return (value_flags)((unsigned)l|(unsigned)r);
418  }
419  inline value_flags operator &(value_flags l, value_flags r) {
420  	return (value_flags)((unsigned)l&(unsigned)r);
421  }
422  inline value_flags operator ~(value_flags l) {
423  	return (value_flags)(~(unsigned)l);
424  }
425  inline value_flags& operator |=(value_flags &l, value_flags r) {
426  	l = l | r;
427  	return l;
428  }
429  inline value_flags& operator &=(value_flags &l, value_flags r) {
430  	l = l & r;
431  	return l;
432  }
433  
434  sb_ostream& operator << (sb_ostream &o, value &v);
435  
436  typedef uint32_t value_hash;
437  
438  enum use_kind {
439  	UK_SRC,
440  	UK_SRC_REL,
441  	UK_DST_REL,
442  	UK_MAYDEF,
443  	UK_MAYUSE,
444  	UK_PRED,
445  	UK_COND
446  };
447  
448  struct use_info {
449  	node *op;
450  	use_kind kind;
451  	int arg;
452  
use_infouse_info453  	use_info(node *n, use_kind kind, int arg)
454  		: op(n), kind(kind), arg(arg) {}
455  };
456  
457  typedef std::list< use_info * > uselist;
458  
459  enum constraint_kind {
460  	CK_SAME_REG,
461  	CK_PACKED_BS,
462  	CK_PHI
463  };
464  
465  class shader;
466  class sb_value_pool;
467  struct ra_chunk;
468  class ra_constraint;
469  
470  class value {
471  protected:
472  	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
kind(k)473  		: kind(k), flags(),
474  			rel(), array(),
475  			version(ver), select(select), pin_gpr(select), gpr(),
476  			gvn_source(), ghash(),
477  			def(), adef(), uses(), constraint(), chunk(),
478  			literal_value(), uid(sh_id) {}
479  
~value()480  	~value() { delete_uses(); }
481  
482  	friend class sb_value_pool;
483  public:
484  	value_kind kind;
485  	value_flags flags;
486  
487  	vvec mdef;
488  	vvec muse;
489  	value *rel;
490  	gpr_array *array;
491  
492  	unsigned version;
493  
494  	sel_chan select;
495  	sel_chan pin_gpr;
496  	sel_chan gpr;
497  
498  	value *gvn_source;
499  	value_hash ghash;
500  
501  	node *def, *adef;
502  	uselist uses;
503  
504  	ra_constraint *constraint;
505  	ra_chunk *chunk;
506  
507  	literal literal_value;
508  
is_const()509  	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
510  
is_AR()511  	bool is_AR() {
512  		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
513  	}
is_geometry_emit()514  	bool is_geometry_emit() {
515  		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
516  	}
517  
any_def()518  	node* any_def() {
519  		assert(!(def && adef));
520  		return def ? def : adef;
521  	}
522  
gvalue()523  	value* gvalue() {
524  		value *v = this;
525  		while (v->gvn_source && v != v->gvn_source)
526  			// FIXME we really shouldn't have such chains
527  			v = v->gvn_source;
528  		return v;
529  	}
530  
is_float_0_or_1()531  	bool is_float_0_or_1() {
532  		value *v = gvalue();
533  		return v->is_const() && (v->literal_value == literal(0)
534  						|| v->literal_value == literal(1.0f));
535  	}
536  
is_undef()537  	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
538  
is_any_gpr()539  	bool is_any_gpr() {
540  		return (kind == VLK_REG || kind == VLK_TEMP);
541  	}
542  
is_agpr()543  	bool is_agpr() {
544  		return array && is_any_gpr();
545  	}
546  
547  	// scalar gpr, as opposed to element of gpr array
is_sgpr()548  	bool is_sgpr() {
549  		return !array && is_any_gpr();
550  	}
551  
is_special_reg()552  	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
is_any_reg()553  	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
is_kcache()554  	bool is_kcache() { return kind == VLK_KCACHE; }
is_rel()555  	bool is_rel() {	return kind == VLK_REL_REG; }
is_readonly()556  	bool is_readonly() { return flags & VLF_READONLY; }
557  
is_chan_pinned()558  	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
is_reg_pinned()559  	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
560  
561  	bool is_global();
562  	void set_global();
563  	void set_prealloc();
564  
565  	bool is_prealloc();
566  
567  	bool is_fixed();
568  	void fix();
569  
is_dead()570  	bool is_dead() { return flags & VLF_DEAD; }
571  
get_const_value()572  	literal & get_const_value() {
573  		value *v = gvalue();
574  		assert(v->is_const());
575  		return v->literal_value;
576  	}
577  
578  	// true if needs to be encoded as literal in alu
is_literal()579  	bool is_literal() {
580  		return is_const()
581  				&& literal_value != literal(0)
582  				&& literal_value != literal(1)
583  				&& literal_value != literal(-1)
584  				&& literal_value != literal(0.5)
585  				&& literal_value != literal(1.0);
586  	}
587  
588  	void add_use(node *n, use_kind kind, int arg);
589  	void remove_use(const node *n);
590  
591  	value_hash hash();
592  	value_hash rel_hash();
593  
assign_source(value * v)594  	void assign_source(value *v) {
595  		assert(!gvn_source || gvn_source == this);
596  		gvn_source = v->gvalue();
597  	}
598  
v_equal(value * v)599  	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
600  
601  	unsigned use_count();
602  	void delete_uses();
603  
get_final_gpr()604  	sel_chan get_final_gpr() {
605  		if (array && array->gpr) {
606  			int reg_offset = select.sel() - array->base_gpr.sel();
607  			if (rel && rel->is_const())
608  				reg_offset += rel->get_const_value().i;
609  			return array->gpr + (reg_offset << 2);
610  		} else {
611  			return gpr;
612  		}
613  	}
614  
get_final_chan()615  	unsigned get_final_chan() {
616  		if (array) {
617  			assert(array->gpr);
618  			return array->gpr.chan();
619  		} else {
620  			assert(gpr);
621  			return gpr.chan();
622  		}
623  	}
624  
625  	val_set interferences;
626  	unsigned uid;
627  };
628  
629  class expr_handler;
630  
631  class value_table {
632  	typedef std::vector<value*> vt_item;
633  	typedef std::vector<vt_item> vt_table;
634  
635  	expr_handler &ex;
636  
637  	unsigned size_bits;
638  	unsigned size;
639  	unsigned size_mask;
640  
641  	vt_table hashtable;
642  
643  	unsigned cnt;
644  
645  public:
646  
647  	value_table(expr_handler &ex, unsigned size_bits = 10)
ex(ex)648  		: ex(ex), size_bits(size_bits), size(1u << size_bits),
649  		  size_mask(size - 1), hashtable(size), cnt() {}
650  
~value_table()651  	~value_table() {}
652  
653  	void add_value(value* v);
654  
655  	bool expr_equal(value* l, value* r);
656  
count()657  	unsigned count() { return cnt; }
658  
659  	void get_values(vvec & v);
660  };
661  
662  class sb_context;
663  
664  enum node_type {
665  	NT_UNKNOWN,
666  	NT_LIST,
667  	NT_OP,
668  	NT_REGION,
669  	NT_REPEAT,
670  	NT_DEPART,
671  	NT_IF,
672  };
673  
674  enum node_subtype {
675  	NST_UNKNOWN,
676  	NST_LIST,
677  	NST_ALU_GROUP,
678  	NST_ALU_CLAUSE,
679  	NST_ALU_INST,
680  	NST_ALU_PACKED_INST,
681  	NST_CF_INST,
682  	NST_FETCH_INST,
683  	NST_TEX_CLAUSE,
684  	NST_VTX_CLAUSE,
685  
686  	NST_BB,
687  
688  	NST_PHI,
689  	NST_PSI,
690  	NST_COPY,
691  
692  	NST_LOOP_PHI_CONTAINER,
693  	NST_LOOP_CONTINUE,
694  	NST_LOOP_BREAK
695  };
696  
697  enum node_flags {
698  	NF_EMPTY = 0,
699  	NF_DEAD = (1 << 0),
700  	NF_REG_CONSTRAINT = (1 << 1),
701  	NF_CHAN_CONSTRAINT = (1 << 2),
702  	NF_ALU_4SLOT = (1 << 3),
703  	NF_CONTAINER = (1 << 4),
704  
705  	NF_COPY_MOV = (1 << 5),
706  
707  	NF_DONT_KILL = (1 << 6),
708  	NF_DONT_HOIST = (1 << 7),
709  	NF_DONT_MOVE = (1 << 8),
710  
711  	// for KILLxx - we want to schedule them as early as possible
712  	NF_SCHEDULE_EARLY = (1 << 9),
713  
714  	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
715  	NF_ALU_STACK_WORKAROUND = (1 << 10)
716  };
717  
718  inline node_flags operator |(node_flags l, node_flags r) {
719  	return (node_flags)((unsigned)l|(unsigned)r);
720  }
721  inline node_flags& operator |=(node_flags &l, node_flags r) {
722  	l = l | r;
723  	return l;
724  }
725  
726  inline node_flags& operator &=(node_flags &l, node_flags r) {
727  	l = (node_flags)((unsigned)l & (unsigned)r);
728  	return l;
729  }
730  
731  inline node_flags operator ~(node_flags r) {
732  	return (node_flags)~(unsigned)r;
733  }
734  
735  struct node_stats {
736  	unsigned alu_count;
737  	unsigned alu_kill_count;
738  	unsigned alu_copy_mov_count;
739  	unsigned cf_count;
740  	unsigned fetch_count;
741  	unsigned region_count;
742  	unsigned loop_count;
743  	unsigned phi_count;
744  	unsigned loop_phi_count;
745  	unsigned depart_count;
746  	unsigned repeat_count;
747  	unsigned if_count;
748  
node_statsnode_stats749  	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
750  			cf_count(), fetch_count(), region_count(),
751  			loop_count(), phi_count(), loop_phi_count(), depart_count(),
752  			repeat_count(), if_count() {}
753  
754  	void dump();
755  };
756  
757  class shader;
758  
759  class vpass;
760  
761  class container_node;
762  class region_node;
763  
764  class node {
765  
766  protected:
767  	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
prev()768  	: prev(), next(), parent(),
769  	  type(nt), subtype(nst), flags(flags),
770  	  pred(), dst(), src() {}
771  
~node()772  	virtual ~node() {};
773  
774  public:
775  	node *prev, *next;
776  	container_node *parent;
777  
778  	node_type type;
779  	node_subtype subtype;
780  	node_flags flags;
781  
782  	value *pred;
783  
784  	vvec dst;
785  	vvec src;
786  
is_valid()787  	virtual bool is_valid() { return true; }
788  	virtual bool accept(vpass &p, bool enter);
789  
790  	void insert_before(node *n);
791  	void insert_after(node *n);
792  	void replace_with(node *n);
793  	void remove();
794  
795  	virtual value_hash hash() const;
796  	value_hash hash_src() const;
797  
798  	virtual bool fold_dispatch(expr_handler *ex);
799  
is_container()800  	bool is_container() { return flags & NF_CONTAINER; }
801  
is_alu_packed()802  	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
is_alu_inst()803  	bool is_alu_inst() { return subtype == NST_ALU_INST; }
is_alu_group()804  	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
is_alu_clause()805  	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
806  
is_fetch_clause()807  	bool is_fetch_clause() {
808  		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE;
809  	}
810  
is_copy()811  	bool is_copy() { return subtype == NST_COPY; }
is_copy_mov()812  	bool is_copy_mov() { return flags & NF_COPY_MOV; }
is_any_alu()813  	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
814  
is_fetch_inst()815  	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
is_cf_inst()816  	bool is_cf_inst() { return subtype == NST_CF_INST; }
817  
is_region()818  	bool is_region() { return type == NT_REGION; }
is_depart()819  	bool is_depart() { return type == NT_DEPART; }
is_repeat()820  	bool is_repeat() { return type == NT_REPEAT; }
is_if()821  	bool is_if() { return type == NT_IF; }
is_bb()822  	bool is_bb() { return subtype == NST_BB; }
823  
is_phi()824  	bool is_phi() { return subtype == NST_PHI; }
825  
is_dead()826  	bool is_dead() { return flags & NF_DEAD; }
827  
828  	bool is_cf_op(unsigned op);
829  	bool is_alu_op(unsigned op);
830  	bool is_fetch_op(unsigned op);
831  
832  	unsigned cf_op_flags();
833  	unsigned alu_op_flags();
834  	unsigned alu_op_slot_flags();
835  	unsigned fetch_op_flags();
836  
837  	bool is_mova();
838  	bool is_pred_set();
839  
vec_uses_ar(vvec & vv)840  	bool vec_uses_ar(vvec &vv) {
841  		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
842  			value *v = *I;
843  			if (v && v->rel && !v->rel->is_const())
844  				return true;
845  		}
846  		return false;
847  	}
848  
uses_ar()849  	bool uses_ar() {
850  		return vec_uses_ar(dst) || vec_uses_ar(src);
851  	}
852  
853  
854  	region_node* get_parent_region();
855  
856  	friend class shader;
857  };
858  
859  class container_node : public node {
860  public:
861  
862  	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
863  	               node_flags flags = NF_EMPTY)
864  	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
865  	  live_after(), live_before() {}
866  
867  	// child items list
868  	node *first, *last;
869  
870  	val_set live_after;
871  	val_set live_before;
872  
873  	class iterator {
874  		node *p;
875  	public:
p(pp)876  		iterator(node *pp = NULL) : p(pp) {}
877  		iterator & operator ++() { p = p->next; return *this;}
878  		iterator & operator --() { p = p->prev; return *this;}
879  		node* operator *() { return p; }
880  		node* operator ->() { return p; }
advance(int n)881  		const iterator advance(int n) {
882  			if (!n) return *this;
883  			iterator I(p);
884  			if (n > 0) while (n--) ++I;
885  			else while (n++) --I;
886  			return I;
887  		}
888  		const iterator operator +(int n) { return advance(n); }
889  		const iterator operator -(int n) { return advance(-n); }
890  		bool operator !=(const iterator &i) { return p != i.p; }
891  		bool operator ==(const iterator &i) { return p == i.p; }
892  	};
893  
894  	class riterator {
895  		iterator i;
896  	public:
i(p)897  		riterator(node *p = NULL) : i(p) {}
898  		riterator & operator ++() { --i; return *this;}
899  		riterator & operator --() { ++i; return *this;}
900  		node* operator *() { return *i; }
901  		node* operator ->() { return *i; }
902  		bool operator !=(const riterator &r) { return i != r.i; }
903  		bool operator ==(const riterator &r) { return i == r.i; }
904  	};
905  
begin()906  	iterator begin() { return first; }
end()907  	iterator end() { return NULL; }
rbegin()908  	riterator rbegin() { return last; }
rend()909  	riterator rend() { return NULL; }
910  
empty()911  	bool empty() { assert(first != NULL || first == last); return !first; }
912  	unsigned count();
913  
914  	// used with node containers that represent shceduling queues
915  	// ignores copies and takes into account alu_packed_node items
916  	unsigned real_alu_count();
917  
918  	void push_back(node *n);
919  	void push_front(node *n);
920  
921  	void insert_node_before(node *s, node *n);
922  	void insert_node_after(node *s, node *n);
923  
924  	void append_from(container_node *c);
925  
926  	// remove range [b..e) from some container and assign to this container
927  	void move(iterator b, iterator e);
928  
929  	void expand();
930  	void expand(container_node *n);
931  	void remove_node(node *n);
932  
933  	node *cut(iterator b, iterator e);
934  
clear()935  	void clear() { first = last = NULL; }
936  
is_valid()937  	virtual bool is_valid() { return true; }
938  	virtual bool accept(vpass &p, bool enter);
939  	virtual bool fold_dispatch(expr_handler *ex);
940  
front()941  	node* front() { return first; }
back()942  	node* back() { return last; }
943  
944  	void collect_stats(node_stats &s);
945  
946  	friend class shader;
947  
948  
949  };
950  
951  typedef container_node::iterator node_iterator;
952  typedef container_node::riterator node_riterator;
953  
954  class alu_group_node : public container_node {
955  protected:
alu_group_node()956  	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
957  public:
958  
959  	std::vector<literal> literals;
960  
is_valid()961  	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
962  	virtual bool accept(vpass &p, bool enter);
963  
964  
literal_chan(literal l)965  	unsigned literal_chan(literal l) {
966  		std::vector<literal>::iterator F =
967  				std::find(literals.begin(), literals.end(), l);
968  		assert(F != literals.end());
969  		return F - literals.begin();
970  	}
971  
972  	friend class shader;
973  };
974  
975  class cf_node : public container_node {
976  protected:
cf_node()977  	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
978  		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
979  public:
980  	bc_cf bc;
981  
982  	cf_node *jump_target;
983  	bool jump_after_target;
984  
is_valid()985  	virtual bool is_valid() { return subtype == NST_CF_INST; }
986  	virtual bool accept(vpass &p, bool enter);
987  	virtual bool fold_dispatch(expr_handler *ex);
988  
jump(cf_node * c)989  	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
jump_after(cf_node * c)990  	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
991  
992  	friend class shader;
993  };
994  
995  class alu_node : public node {
996  protected:
alu_node()997  	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
998  public:
999  	bc_alu bc;
1000  
is_valid()1001  	virtual bool is_valid() { return subtype == NST_ALU_INST; }
1002  	virtual bool accept(vpass &p, bool enter);
1003  	virtual bool fold_dispatch(expr_handler *ex);
1004  
forced_bank_swizzle()1005  	unsigned forced_bank_swizzle() {
1006  		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
1007  				VEC_210 : 0;
1008  	}
1009  
1010  	// return param index + 1 if instruction references interpolation param,
1011  	// otherwise 0
1012  	unsigned interp_param();
1013  
1014  	alu_group_node *get_alu_group_node();
1015  
1016  	friend class shader;
1017  };
1018  
1019  // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
1020  class alu_packed_node : public container_node {
1021  protected:
alu_packed_node()1022  	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
1023  public:
1024  
op_ptr()1025  	const alu_op_info* op_ptr() {
1026  		return static_cast<alu_node*>(first)->bc.op_ptr;
1027  	}
op()1028  	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
1029  	void init_args(bool repl);
1030  
is_valid()1031  	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
1032  	virtual bool accept(vpass &p, bool enter);
1033  	virtual bool fold_dispatch(expr_handler *ex);
1034  
1035  	unsigned get_slot_mask();
1036  	void update_packed_items(sb_context &ctx);
1037  
1038  	friend class shader;
1039  };
1040  
1041  class fetch_node : public node {
1042  protected:
fetch_node()1043  	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
1044  public:
1045  	bc_fetch bc;
1046  
is_valid()1047  	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
1048  	virtual bool accept(vpass &p, bool enter);
1049  	virtual bool fold_dispatch(expr_handler *ex);
1050  
uses_grad()1051  	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
1052  
1053  	friend class shader;
1054  };
1055  
1056  class region_node;
1057  
1058  class repeat_node : public container_node {
1059  protected:
repeat_node(region_node * target,unsigned id)1060  	repeat_node(region_node *target, unsigned id)
1061  	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
1062  public:
1063  	region_node *target;
1064  	unsigned rep_id;
1065  
1066  	virtual bool accept(vpass &p, bool enter);
1067  
1068  	friend class shader;
1069  };
1070  
1071  class depart_node : public container_node {
1072  protected:
depart_node(region_node * target,unsigned id)1073  	depart_node(region_node *target, unsigned id)
1074  	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
1075  public:
1076  	region_node *target;
1077  	unsigned dep_id;
1078  
1079  	virtual bool accept(vpass &p, bool enter);
1080  
1081  	friend class shader;
1082  };
1083  
1084  class if_node : public container_node {
1085  protected:
if_node()1086  	if_node() : container_node(NT_IF, NST_LIST), cond() {};
1087  public:
1088  	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
1089  
1090  	virtual bool accept(vpass &p, bool enter);
1091  
1092  	friend class shader;
1093  };
1094  
1095  typedef std::vector<depart_node*> depart_vec;
1096  typedef std::vector<repeat_node*> repeat_vec;
1097  
1098  class region_node : public container_node {
1099  protected:
region_node(unsigned id)1100  	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
1101  			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
1102  			{}
1103  public:
1104  	unsigned region_id;
1105  
1106  	container_node *loop_phi;
1107  	container_node *phi;
1108  
1109  	val_set vars_defined;
1110  
1111  	depart_vec departs;
1112  	repeat_vec repeats;
1113  
1114  	// true if region was created for loop in the parser, sometimes repeat_node
1115  	// may be optimized away so we need to remember this information
1116  	bool src_loop;
1117  
1118  	virtual bool accept(vpass &p, bool enter);
1119  
dep_count()1120  	unsigned dep_count() { return departs.size(); }
rep_count()1121  	unsigned rep_count() { return repeats.size() + 1; }
1122  
is_loop()1123  	bool is_loop() { return src_loop || !repeats.empty(); }
1124  
get_entry_code_location()1125  	container_node* get_entry_code_location() {
1126  		node *p = first;
1127  		while (p && (p->is_depart() || p->is_repeat()))
1128  			p = static_cast<container_node*>(p)->first;
1129  
1130  		container_node *c = static_cast<container_node*>(p);
1131  		if (c->is_bb())
1132  			return c;
1133  		else
1134  			return c->parent;
1135  	}
1136  
1137  	void expand_depart(depart_node *d);
1138  	void expand_repeat(repeat_node *r);
1139  
1140  	friend class shader;
1141  };
1142  
1143  class bb_node : public container_node {
1144  protected:
bb_node(unsigned id,unsigned loop_level)1145  	bb_node(unsigned id, unsigned loop_level)
1146  		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
1147  public:
1148  	unsigned id;
1149  	unsigned loop_level;
1150  
1151  	virtual bool accept(vpass &p, bool enter);
1152  
1153  	friend class shader;
1154  };
1155  
1156  
1157  typedef std::vector<region_node*> regions_vec;
1158  typedef std::vector<bb_node*> bbs_vec;
1159  typedef std::list<node*> sched_queue;
1160  typedef sched_queue::iterator sq_iterator;
1161  typedef std::vector<node*> node_vec;
1162  typedef std::list<node*> node_list;
1163  typedef std::set<node*> node_set;
1164  
1165  
1166  
1167  } // namespace r600_sb
1168  
1169  #endif /* R600_SB_IR_H_ */
1170