• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_PASS_H_
28 #define SB_PASS_H_
29 
30 #include <stack>
31 
32 namespace r600_sb {
33 
34 class pass {
35 protected:
36 	sb_context &ctx;
37 	shader &sh;
38 
39 public:
40 	pass(shader &s);
41 
42 	virtual int run();
43 
~pass()44 	virtual ~pass() {}
45 };
46 
47 class vpass : public pass {
48 
49 public:
50 
vpass(shader & s)51 	vpass(shader &s) : pass(s) {}
52 
53 	virtual int init();
54 	virtual int done();
55 
56 	virtual int run();
57 	virtual void run_on(container_node &n);
58 
59 	virtual bool visit(node &n, bool enter);
60 	virtual bool visit(container_node &n, bool enter);
61 	virtual bool visit(alu_group_node &n, bool enter);
62 	virtual bool visit(cf_node &n, bool enter);
63 	virtual bool visit(alu_node &n, bool enter);
64 	virtual bool visit(alu_packed_node &n, bool enter);
65 	virtual bool visit(fetch_node &n, bool enter);
66 	virtual bool visit(region_node &n, bool enter);
67 	virtual bool visit(repeat_node &n, bool enter);
68 	virtual bool visit(depart_node &n, bool enter);
69 	virtual bool visit(if_node &n, bool enter);
70 	virtual bool visit(bb_node &n, bool enter);
71 
72 };
73 
74 class rev_vpass : public vpass {
75 
76 public:
rev_vpass(shader & s)77 	rev_vpass(shader &s) : vpass(s) {}
78 
79 	virtual void run_on(container_node &n);
80 };
81 
82 
83 // =================== PASSES
84 
85 class bytecode;
86 
87 class bc_dump : public vpass {
88 	using vpass::visit;
89 
90 	uint32_t *bc_data;
91 	unsigned ndw;
92 
93 	unsigned id;
94 
95 	unsigned new_group, group_index;
96 
97 public:
98 
99 	bc_dump(shader &s, bytecode *bc = NULL);
100 
bc_dump(shader & s,uint32_t * bc_ptr,unsigned ndw)101 	bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
102 		vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
103 
104 	virtual int init();
105 	virtual int done();
106 
107 	virtual bool visit(cf_node &n, bool enter);
108 	virtual bool visit(alu_node &n, bool enter);
109 	virtual bool visit(fetch_node &n, bool enter);
110 
111 	void dump_dw(unsigned dw_id, unsigned count = 2);
112 
113 	void dump(cf_node& n);
114 	void dump(alu_node& n);
115 	void dump(fetch_node& n);
116 };
117 
118 
119 class dce_cleanup : public vpass {
120 	using vpass::visit;
121 
122 	bool remove_unused;
123 
124 public:
125 
dce_cleanup(shader & s)126 	dce_cleanup(shader &s) : vpass(s),
127 		remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
128 
129 	virtual int run();
130 
131 	virtual bool visit(node &n, bool enter);
132 	virtual bool visit(alu_group_node &n, bool enter);
133 	virtual bool visit(cf_node &n, bool enter);
134 	virtual bool visit(alu_node &n, bool enter);
135 	virtual bool visit(alu_packed_node &n, bool enter);
136 	virtual bool visit(fetch_node &n, bool enter);
137 	virtual bool visit(region_node &n, bool enter);
138 	virtual bool visit(container_node &n, bool enter);
139 
140 private:
141 
142 	void cleanup_dst(node &n);
143 	bool cleanup_dst_vec(vvec &vv);
144 
145 	// Did we alter/remove nodes during a single pass?
146 	bool nodes_changed;
147 };
148 
149 
150 class def_use : public pass {
151 
152 public:
153 
def_use(shader & sh)154 	def_use(shader &sh) : pass(sh) {}
155 
156 	virtual int run();
157 	void run_on(node *n, bool defs);
158 
159 private:
160 
161 	void process_uses(node *n);
162 	void process_defs(node *n, vvec &vv, bool arr_def);
163 	void process_phi(container_node *c, bool defs, bool uses);
164 };
165 
166 
167 
168 class dump : public vpass {
169 	using vpass::visit;
170 
171 	int level;
172 
173 public:
174 
dump(shader & s)175 	dump(shader &s) : vpass(s), level(0) {}
176 
177 	virtual bool visit(node &n, bool enter);
178 	virtual bool visit(container_node &n, bool enter);
179 	virtual bool visit(alu_group_node &n, bool enter);
180 	virtual bool visit(cf_node &n, bool enter);
181 	virtual bool visit(alu_node &n, bool enter);
182 	virtual bool visit(alu_packed_node &n, bool enter);
183 	virtual bool visit(fetch_node &n, bool enter);
184 	virtual bool visit(region_node &n, bool enter);
185 	virtual bool visit(repeat_node &n, bool enter);
186 	virtual bool visit(depart_node &n, bool enter);
187 	virtual bool visit(if_node &n, bool enter);
188 	virtual bool visit(bb_node &n, bool enter);
189 
190 
191 	static void dump_op(node &n, const char *name);
192 	static void dump_vec(const vvec & vv);
193 	static void dump_set(shader &sh, val_set & v);
194 
195 	static void dump_rels(vvec & vv);
196 
197 	static void dump_val(value *v);
198 	static void dump_op(node *n);
199 
200 	static void dump_op_list(container_node *c);
201 	static void dump_queue(sched_queue &q);
202 
203 	static void dump_alu(alu_node *n);
204 
205 private:
206 
207 	void indent();
208 
209 	void dump_common(node &n);
210 	void dump_flags(node &n);
211 
212 	void dump_live_values(container_node &n, bool before);
213 };
214 
215 
216 // Global Code Motion
217 
218 class gcm : public pass {
219 
220 	sched_queue bu_ready[SQ_NUM];
221 	sched_queue bu_ready_next[SQ_NUM];
222 	sched_queue bu_ready_early[SQ_NUM];
223 	sched_queue ready;
224 	sched_queue ready_above;
225 
226 	container_node pending;
227 
228 	struct op_info {
229 		bb_node* top_bb;
230 		bb_node* bottom_bb;
op_infoop_info231 		op_info() : top_bb(), bottom_bb() {}
232 	};
233 
234 	typedef std::map<node*, op_info> op_info_map;
235 
236 	typedef std::map<node*, unsigned> nuc_map;
237 
238 	op_info_map op_map;
239 	nuc_map uses;
240 
241 	typedef std::vector<nuc_map> nuc_stack;
242 
243 	nuc_stack nuc_stk;
244 	unsigned ucs_level;
245 
246 	bb_node * bu_bb;
247 
248 	vvec pending_defs;
249 
250 	node_list pending_nodes;
251 
252 	unsigned cur_sq;
253 
254 	// for register pressure tracking in bottom-up pass
255 	val_set live;
256 	int live_count;
257 
258 	static const int rp_threshold = 100;
259 
260 	bool pending_exec_mask_update;
261 
262 public:
263 
gcm(shader & sh)264 	gcm(shader &sh) : pass(sh),
265 		bu_ready(), bu_ready_next(), bu_ready_early(),
266 		ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
267 		bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
268 		live(), live_count(), pending_exec_mask_update() {}
269 
270 	virtual int run();
271 
272 private:
273 
274 	void collect_instructions(container_node *c, bool early_pass);
275 
276 	void sched_early(container_node *n);
277 	void td_sched_bb(bb_node *bb);
278 	bool td_is_ready(node *n);
279 	void td_release_uses(vvec &v);
280 	void td_release_val(value *v);
281 	void td_schedule(bb_node *bb, node *n);
282 
283 	void sched_late(container_node *n);
284 	void bu_sched_bb(bb_node *bb);
285 	void bu_release_defs(vvec &v, bool src);
286 	void bu_release_phi_defs(container_node *p, unsigned op);
287 	bool bu_is_ready(node *n);
288 	void bu_release_val(value *v);
289 	void bu_release_op(node * n);
290 	void bu_find_best_bb(node *n, op_info &oi);
291 	void bu_schedule(container_node *bb, node *n);
292 
293 	void push_uc_stack();
294 	void pop_uc_stack();
295 
296 	void init_def_count(nuc_map &m, container_node &s);
297 	void init_use_count(nuc_map &m, container_node &s);
298 	unsigned get_uc_vec(vvec &vv);
299 	unsigned get_dc_vec(vvec &vv, bool src);
300 
301 	void add_ready(node *n);
302 
303 	void dump_uc_stack();
304 
305 	unsigned real_alu_count(sched_queue &q, unsigned max);
306 
307 	// check if we have not less than threshold ready alu instructions
308 	bool check_alu_ready_count(unsigned threshold);
309 };
310 
311 
312 class gvn : public vpass {
313 	using vpass::visit;
314 
315 public:
316 
gvn(shader & sh)317 	gvn(shader &sh) : vpass(sh) {}
318 
319 	virtual bool visit(node &n, bool enter);
320 	virtual bool visit(cf_node &n, bool enter);
321 	virtual bool visit(alu_node &n, bool enter);
322 	virtual bool visit(alu_packed_node &n, bool enter);
323 	virtual bool visit(fetch_node &n, bool enter);
324 	virtual bool visit(region_node &n, bool enter);
325 
326 private:
327 
328 	void process_op(node &n, bool rewrite = true);
329 
330 	// returns true if the value was rewritten
331 	bool process_src(value* &v, bool rewrite);
332 
333 
334 	void process_alu_src_constants(node &n, value* &v);
335 };
336 
337 
338 class if_conversion : public pass {
339 
340 public:
341 
if_conversion(shader & sh)342 	if_conversion(shader &sh) : pass(sh) {}
343 
344 	virtual int run();
345 
346 	bool run_on(region_node *r);
347 
348 	void convert_kill_instructions(region_node *r, value *em, bool branch,
349 	                               container_node *c);
350 
351 	bool check_and_convert(region_node *r);
352 
353 	alu_node* convert_phi(value *select, node *phi);
354 
355 };
356 
357 
358 class liveness : public rev_vpass {
359 	using vpass::visit;
360 
361 	val_set live;
362 	bool live_changed;
363 
364 public:
365 
liveness(shader & s)366 	liveness(shader &s) : rev_vpass(s), live_changed(false) {}
367 
368 	virtual int init();
369 
370 	virtual bool visit(node &n, bool enter);
371 	virtual bool visit(bb_node &n, bool enter);
372 	virtual bool visit(container_node &n, bool enter);
373 	virtual bool visit(alu_group_node &n, bool enter);
374 	virtual bool visit(cf_node &n, bool enter);
375 	virtual bool visit(alu_node &n, bool enter);
376 	virtual bool visit(alu_packed_node &n, bool enter);
377 	virtual bool visit(fetch_node &n, bool enter);
378 	virtual bool visit(region_node &n, bool enter);
379 	virtual bool visit(repeat_node &n, bool enter);
380 	virtual bool visit(depart_node &n, bool enter);
381 	virtual bool visit(if_node &n, bool enter);
382 
383 private:
384 
385 	void update_interferences();
386 	void process_op(node &n);
387 
388 	bool remove_val(value *v);
389 	bool remove_vec(vvec &v);
390 	bool process_outs(node& n);
391 	void process_ins(node& n);
392 
393 	void process_phi_outs(container_node *phi);
394 	void process_phi_branch(container_node *phi, unsigned id);
395 
396 	bool process_maydef(value *v);
397 
398 	bool add_vec(vvec &vv, bool src);
399 
400 	void update_src_vec(vvec &vv, bool src);
401 };
402 
403 
404 struct bool_op_info {
405 	bool invert;
406 	unsigned int_cvt;
407 
408 	alu_node *n;
409 };
410 
411 class peephole : public pass {
412 
413 public:
414 
peephole(shader & sh)415 	peephole(shader &sh) : pass(sh) {}
416 
417 	virtual int run();
418 
419 	void run_on(container_node *c);
420 
421 	void optimize_cc_op(alu_node *a);
422 
423 	void optimize_cc_op2(alu_node *a);
424 	void optimize_CNDcc_op(alu_node *a);
425 
426 	bool get_bool_op_info(value *b, bool_op_info& bop);
427 	bool get_bool_flt_to_int_source(alu_node* &a);
428 	void convert_float_setcc(alu_node *f2i, alu_node *s);
429 };
430 
431 
432 class psi_ops : public rev_vpass {
433 	using rev_vpass::visit;
434 
435 public:
436 
psi_ops(shader & s)437 	psi_ops(shader &s) : rev_vpass(s) {}
438 
439 	virtual bool visit(node &n, bool enter);
440 	virtual bool visit(alu_node &n, bool enter);
441 
442 	bool try_inline(node &n);
443 	bool try_reduce(node &n);
444 	bool eliminate(node &n);
445 
446 	void unpredicate(node *n);
447 };
448 
449 
450 // check correctness of the generated code, e.g.:
451 // - expected source operand value is the last value written to its gpr,
452 // - all arguments of phi node should be allocated to the same gpr,
453 // TODO other tests
454 class ra_checker : public pass {
455 
456 	typedef std::map<sel_chan, value *> reg_value_map;
457 
458 	typedef std::vector<reg_value_map> regmap_stack;
459 
460 	regmap_stack rm_stack;
461 	unsigned rm_stk_level;
462 
463 	value* prev_dst[5];
464 
465 public:
466 
ra_checker(shader & sh)467 	ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
468 
469 	virtual int run();
470 
471 	void run_on(container_node *c);
472 
473 	void dump_error(const error_info &e);
474 	void dump_all_errors();
475 
476 private:
477 
rmap()478 	reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
479 
480 	void push_stack();
481 	void pop_stack();
482 
483 	// when going out of the alu clause, values in the clause temporary gprs,
484 	// AR, predicate values, PS/PV are destroyed
485 	void kill_alu_only_regs();
486 	void error(node *n, unsigned id, std::string msg);
487 
488 	void check_phi_src(container_node *p, unsigned id);
489 	void process_phi_dst(container_node *p);
490 	void check_alu_group(alu_group_node *g);
491 	void process_op_dst(node *n);
492 	void check_op_src(node *n);
493 	void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
494 	void check_value_gpr(node *n, unsigned id, value *v);
495 };
496 
497 // =======================================
498 
499 
500 class ra_coalesce : public pass {
501 
502 public:
503 
ra_coalesce(shader & sh)504 	ra_coalesce(shader &sh) : pass(sh) {}
505 
506 	virtual int run();
507 };
508 
509 
510 
511 // =======================================
512 
513 class ra_init : public pass {
514 
515 public:
516 
ra_init(shader & sh)517 	ra_init(shader &sh) : pass(sh), prev_chans() {
518 
519 		// The parameter below affects register channels distribution.
520 		// For cayman (VLIW-4) we're trying to distribute the channels
521 		// uniformly, this means significantly better alu slots utilization
522 		// at the expense of higher gpr usage. Hopefully this will improve
523 		// performance, though it has to be proven with real benchmarks yet.
524 		// For VLIW-5 this method could also slightly improve slots
525 		// utilization, but increased register pressure seems more significant
526 		// and overall performance effect is negative according to some
527 		// benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
528 		// really need it because trans slot (unrestricted by register write
529 		// channel) allows to consume most deviations from uniform channel
530 		// distribution.
531 		// Value 3 means that for new allocation we'll use channel that differs
532 		// from 3 last used channels. 0 for VLIW-5 effectively turns this off.
533 
534 		ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
535 	}
536 
537 	virtual int run();
538 
539 private:
540 
541 	unsigned prev_chans;
542 	unsigned ra_tune;
543 
544 	void add_prev_chan(unsigned chan);
545 	unsigned get_preferable_chan_mask();
546 
547 	void ra_node(container_node *c);
548 	void process_op(node *n);
549 
550 	void color(value *v);
551 
552 	void color_bs_constraint(ra_constraint *c);
553 
554 	void assign_color(value *v, sel_chan c);
555 	void alloc_arrays();
556 };
557 
558 // =======================================
559 
560 class ra_split : public pass {
561 
562 public:
563 
ra_split(shader & sh)564 	ra_split(shader &sh) : pass(sh) {}
565 
566 	virtual int run();
567 
568 	void split(container_node *n);
569 	void split_op(node *n);
570 	void split_alu_packed(alu_packed_node *n);
571 	void split_vector_inst(node *n);
572 
573 	void split_packed_ins(alu_packed_node *n);
574 
575 #if 0
576 	void split_pinned_outs(node *n);
577 #endif
578 
579 	void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
580 
581 	void split_phi_src(container_node *loc, container_node *c, unsigned id,
582 	                   bool loop);
583 	void split_phi_dst(node *loc, container_node *c, bool loop);
584 	void init_phi_constraints(container_node *c);
585 };
586 
587 
588 
589 class ssa_prepare : public vpass {
590 	using vpass::visit;
591 
592 	typedef std::vector<val_set> vd_stk;
593 	vd_stk stk;
594 
595 	unsigned level;
596 
597 public:
ssa_prepare(shader & s)598 	ssa_prepare(shader &s) : vpass(s), level(0) {}
599 
600 	virtual bool visit(cf_node &n, bool enter);
601 	virtual bool visit(alu_node &n, bool enter);
602 	virtual bool visit(fetch_node &n, bool enter);
603 	virtual bool visit(region_node &n, bool enter);
604 	virtual bool visit(repeat_node &n, bool enter);
605 	virtual bool visit(depart_node &n, bool enter);
606 
607 private:
608 
push_stk()609 	void push_stk() {
610 		++level;
611 		if (level + 1 > stk.size())
612 			stk.resize(level+1);
613 		else
614 			stk[level].clear();
615 	}
pop_stk()616 	void pop_stk() {
617 		assert(level);
618 		--level;
619 		stk[level].add_set(stk[level + 1]);
620 	}
621 
622 	void add_defs(node &n);
623 
cur_set()624 	val_set & cur_set() { return stk[level]; }
625 
626 	container_node* create_phi_nodes(int count);
627 };
628 
629 class ssa_rename : public vpass {
630 	using vpass::visit;
631 
632 	typedef sb_map<value*, unsigned> def_map;
633 
634 	def_map def_count;
635 	std::stack<def_map> rename_stack;
636 
637 	typedef std::map<uint32_t, value*> val_map;
638 	val_map values;
639 
640 public:
641 
ssa_rename(shader & s)642 	ssa_rename(shader &s) : vpass(s) {}
643 
644 	virtual int init();
645 
646 	virtual bool visit(container_node &n, bool enter);
647 	virtual bool visit(node &n, bool enter);
648 	virtual bool visit(alu_group_node &n, bool enter);
649 	virtual bool visit(cf_node &n, bool enter);
650 	virtual bool visit(alu_node &n, bool enter);
651 	virtual bool visit(alu_packed_node &n, bool enter);
652 	virtual bool visit(fetch_node &n, bool enter);
653 	virtual bool visit(region_node &n, bool enter);
654 	virtual bool visit(repeat_node &n, bool enter);
655 	virtual bool visit(depart_node &n, bool enter);
656 	virtual bool visit(if_node &n, bool enter);
657 
658 private:
659 
660 	void push(node *phi);
661 	void pop();
662 
663 	unsigned get_index(def_map& m, value* v);
664 	void set_index(def_map& m, value* v, unsigned index);
665 	unsigned new_index(def_map& m, value* v);
666 
667 	value* rename_use(node *n, value* v);
668 	value* rename_def(node *def, value* v);
669 
670 	void rename_src_vec(node *n, vvec &vv, bool src);
671 	void rename_dst_vec(node *def, vvec &vv, bool set_def);
672 
673 	void rename_src(node *n);
674 	void rename_dst(node *n);
675 
676 	void rename_phi_args(container_node *phi, unsigned op, bool def);
677 
678 	void rename_virt(node *n);
679 	void rename_virt_val(node *n, value *v);
680 };
681 
682 class bc_finalizer : public pass {
683 
684 	cf_node *last_export[EXP_TYPE_COUNT];
685 	cf_node *last_cf;
686 
687 	unsigned ngpr;
688 	unsigned nstack;
689 
690 public:
691 
bc_finalizer(shader & sh)692 	bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
693 		nstack() {}
694 
695 	virtual int run();
696 
697 	void finalize_loop(region_node *r);
698 	void finalize_if(region_node *r);
699 
700 	void run_on(container_node *c);
701 
702 	void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
703 	void finalize_alu_group(alu_group_node *g, node *prev_node);
704 	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
705 
706 	void emit_set_grad(fetch_node* f);
707 	void finalize_fetch(fetch_node *f);
708 
709 	void finalize_cf(cf_node *c);
710 
711 	sel_chan translate_kcache(cf_node *alu, value *v);
712 
713 	void update_ngpr(unsigned gpr);
714 	void update_nstack(region_node *r, unsigned add = 0);
715 
716 	unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
717 	                         unsigned add = 0);
718 
719 	void cf_peephole();
720 
721 private:
722 	void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
723 	void emit_set_texture_offsets(fetch_node &f);
724 };
725 
726 
727 } // namespace r600_sb
728 
729 #endif /* SB_PASS_H_ */
730