• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * on the rights to use, copy, modify, merge, publish, distribute, sub
8   * license, and/or sell copies of the Software, and to permit persons to whom
9   * the Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21   * USE OR OTHER DEALINGS IN THE SOFTWARE.
22   *
23   * Authors:
24   *      Vadim Girlin
25   */
26  
27  #define IFC_DEBUG 0
28  
29  #if IFC_DEBUG
30  #define IFC_DUMP(q) do { q } while (0)
31  #else
32  #define IFC_DUMP(q)
33  #endif
34  
35  #include "sb_shader.h"
36  #include "sb_pass.h"
37  
38  namespace r600_sb {
39  
run()40  int if_conversion::run() {
41  
42  	regions_vec &rv = sh.get_regions();
43  
44  	unsigned converted = 0;
45  
46  	for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend();
47  			I != E; I = N) {
48  		N = I; ++N;
49  
50  		region_node *r = *I;
51  		if (run_on(r)) {
52  			rv.erase(I.base() - 1);
53  			++converted;
54  		}
55  	}
56  	return 0;
57  }
58  
convert_kill_instructions(region_node * r,value * em,bool branch,container_node * c)59  void if_conversion::convert_kill_instructions(region_node *r,
60                                                value *em, bool branch,
61                                                container_node *c) {
62  	value *cnd = NULL;
63  
64  	for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
65  		N = I + 1;
66  
67  		if (!I->is_alu_inst())
68  			continue;
69  
70  		alu_node *a = static_cast<alu_node*>(*I);
71  		unsigned flags = a->bc.op_ptr->flags;
72  
73  		if (!(flags & AF_KILL))
74  			continue;
75  
76  		// ignore predicated or non-const kill instructions
77  		if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
78  			continue;
79  
80  		literal l0 = a->src[0]->literal_value;
81  		literal l1 = a->src[1]->literal_value;
82  
83  		expr_handler::apply_alu_src_mod(a->bc, 0, l0);
84  		expr_handler::apply_alu_src_mod(a->bc, 1, l1);
85  
86  		if (expr_handler::evaluate_condition(flags, l0, l1)) {
87  			// kill with constant 'true' condition, we'll convert it to the
88  			// conditional kill outside of the if-then-else block
89  
90  			a->remove();
91  
92  			if (!cnd) {
93  				cnd = get_select_value_for_em(sh, em);
94  			} else {
95  				// more than one kill with the same condition, just remove it
96  				continue;
97  			}
98  
99  			r->insert_before(a);
100  			a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
101  
102  			a->src[0] = cnd;
103  			a->src[1] = sh.get_const_value(0);
104  			// clear modifiers
105  			memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
106  			memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
107  		} else {
108  			// kill with constant 'false' condition, this shouldn't happen
109  			// but remove it anyway
110  			a->remove();
111  		}
112  	}
113  }
114  
check_and_convert(region_node * r)115  bool if_conversion::check_and_convert(region_node *r) {
116  
117  	depart_node *nd1 = static_cast<depart_node*>(r->first);
118  	if (!nd1->is_depart() || nd1->target != r)
119  		return false;
120  	if_node *nif = static_cast<if_node*>(nd1->first);
121  	if (!nif->is_if())
122  		return false;
123  	depart_node *nd2 = static_cast<depart_node*>(nif->first);
124  	if (!nd2->is_depart() || nd2->target != r)
125  		return false;
126  
127  	value* &em = nif->cond;
128  
129  	node_stats s;
130  
131  	r->collect_stats(s);
132  
133  	IFC_DUMP(
134  		sblog << "ifcvt: region " << r->region_id << " :\n";
135  		s.dump();
136  	);
137  
138  	if (s.region_count || s.fetch_count || s.alu_kill_count ||
139  			s.if_count != 1 || s.repeat_count)
140  		return false;
141  
142  	unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
143  
144  	// if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
145  	// JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
146  	// are eliminated. According to the docs, cost of CF instruction is
147  	// equal to ~40 ALU VLIW instructions (instruction groups),
148  	// so we have eliminated cost equal to ~120 groups in total.
149  	// Let's also assume that we have avg 3 ALU instructions per group,
150  	// This means that potential eliminated cost is about 360 single alu inst.
151  	// On the other hand, we are speculatively executing conditional code now,
152  	// so we are increasing the cost in some cases. In the worst case, we'll
153  	// have to execute real_alu_count additional alu instructions instead of
154  	// jumping over them. Let's assume for now that average added cost is
155  	//
156  	// (0.9 * real_alu_count)
157  	//
158  	// So we should perform if_conversion if
159  	//
160  	// (0.9 * real_alu_count) < 360, or
161  	//
162  	// real_alu_count < 400
163  	//
164  	// So if real_alu_count is more than 400, than we think that if_conversion
165  	// doesn't make sense.
166  
167  	// FIXME: We can use more precise heuristic, taking into account sizes of
168  	// the branches and their probability instead of total size.
169  	// Another way to improve this is to consider the number of the groups
170  	// instead of the number of instructions (taking into account actual VLIW
171  	// packing).
172  	// (Currently we don't know anything about packing at this stage, but
173  	// probably we can make some more precise estimations anyway)
174  
175  	if (real_alu_count > 400)
176  		return false;
177  
178  	IFC_DUMP( sblog << "if_cvt: processing...\n"; );
179  
180  	value *select = get_select_value_for_em(sh, em);
181  
182  	if (!select)
183  		return false;
184  
185  	for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
186  		node *n = *I;
187  
188  		alu_node *ns = convert_phi(select, n);
189  
190  		if (ns)
191  			r->insert_after(ns);
192  	}
193  
194  	nd2->expand();
195  	nif->expand();
196  	nd1->expand();
197  	r->expand();
198  
199  	return true;
200  }
201  
run_on(region_node * r)202  bool if_conversion::run_on(region_node* r) {
203  
204  	if (r->dep_count() != 2 || r->rep_count() != 1)
205  		return false;
206  
207  	depart_node *nd1 = static_cast<depart_node*>(r->first);
208  	if (!nd1->is_depart())
209  		return false;
210  	if_node *nif = static_cast<if_node*>(nd1->first);
211  	if (!nif->is_if())
212  		return false;
213  	depart_node *nd2 = static_cast<depart_node*>(nif->first);
214  	if (!nd2->is_depart())
215  		return false;
216  
217  	value* &em = nif->cond;
218  
219  	convert_kill_instructions(r, em, true, nd2);
220  	convert_kill_instructions(r, em, false, nd1);
221  
222  	if (check_and_convert(r))
223  		return true;
224  
225  	if (nd2->empty() && nif->next) {
226  		// empty true branch, non-empty false branch
227  		// we'll invert it to get rid of 'else'
228  
229  		assert(em && em->def);
230  
231  		alu_node *predset = static_cast<alu_node*>(em->def);
232  
233  		// create clone of PREDSET instruction with inverted condition.
234  		// PREDSET has 3 dst operands in our IR (value written to gpr,
235  		// predicate value and exec mask value), we'll split it such that
236  		// new PREDSET will define exec mask value only, and two others will
237  		// be defined in the old PREDSET (if they are not used then DCE will
238  		// simply remove old PREDSET).
239  
240  		alu_node *newpredset = sh.clone(predset);
241  		predset->insert_after(newpredset);
242  
243  		predset->dst[2] = NULL;
244  
245  		newpredset->dst[0] = NULL;
246  		newpredset->dst[1] = NULL;
247  
248  		em->def = newpredset;
249  
250  		unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
251  		unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
252  		bool swapargs = false;
253  
254  		cc = invert_setcc_condition(cc, swapargs);
255  
256  		if (swapargs) {
257  			std::swap(newpredset->src[0], newpredset->src[1]);
258  			std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
259  		}
260  
261  		unsigned newopcode = get_predsetcc_op(cc, cmptype);
262  		newpredset->bc.set_op(newopcode);
263  
264  		// move the code from the 'false' branch ('else') to the 'true' branch
265  		nd2->move(nif->next, NULL);
266  
267  		// swap phi operands
268  		for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
269  				++I) {
270  			node *p = *I;
271  			assert(p->src.size() == 2);
272  			std::swap(p->src[0], p->src[1]);
273  		}
274  	}
275  
276  	return false;
277  }
278  
convert_phi(value * select,node * phi)279  alu_node* if_conversion::convert_phi(value* select, node* phi) {
280  	assert(phi->dst.size() == 1 || phi->src.size() == 2);
281  
282  	value *d = phi->dst[0];
283  	value *v1 = phi->src[0];
284  	value *v2 = phi->src[1];
285  
286  	assert(d);
287  
288  	if (!d->is_any_gpr())
289  		return NULL;
290  
291  	if (v1->is_undef()) {
292  		if (v2->is_undef()) {
293  			return NULL;
294  		} else {
295  			return sh.create_mov(d, v2);
296  		}
297  	} else if (v2->is_undef())
298  		return sh.create_mov(d, v1);
299  
300  	alu_node* n = sh.create_alu();
301  
302  	n->bc.set_op(ALU_OP3_CNDE_INT);
303  	n->dst.push_back(d);
304  	n->src.push_back(select);
305  	n->src.push_back(v1);
306  	n->src.push_back(v2);
307  
308  	return n;
309  }
310  
311  } // namespace r600_sb
312