• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define PPH_DEBUG 0
28 
29 #if PPH_DEBUG
30 #define PPH_DUMP(q) do { q } while (0)
31 #else
32 #define PPH_DUMP(q)
33 #endif
34 
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37 
38 namespace r600_sb {
39 
run()40 int peephole::run() {
41 
42 	run_on(sh.root);
43 
44 	return 0;
45 }
46 
run_on(container_node * c)47 void peephole::run_on(container_node* c) {
48 
49 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
50 		node *n = *I;
51 
52 		if (n->is_container())
53 			run_on(static_cast<container_node*>(n));
54 		else {
55 
56 			if (n->is_alu_inst()) {
57 				alu_node *a = static_cast<alu_node*>(n);
58 
59 				if (a->bc.op_ptr->flags &
60 						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
61 					optimize_cc_op(a);
62 				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
63 
64 					alu_node *s = a;
65 					if (get_bool_flt_to_int_source(s)) {
66 						convert_float_setcc(a, s);
67 					}
68 				}
69 			}
70 		}
71 	}
72 }
73 
optimize_cc_op(alu_node * a)74 void peephole::optimize_cc_op(alu_node* a) {
75 	unsigned aflags = a->bc.op_ptr->flags;
76 
77 	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
78 		optimize_cc_op2(a);
79 	} else if (aflags & AF_CMOV) {
80 		optimize_CNDcc_op(a);
81 	}
82 }
83 
convert_float_setcc(alu_node * f2i,alu_node * s)84 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
85 	alu_node *ns = sh.clone(s);
86 
87 	ns->dst[0] = f2i->dst[0];
88 	ns->dst[0]->def = ns;
89 	ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
90 	f2i->insert_after(ns);
91 	f2i->remove();
92 }
93 
optimize_cc_op2(alu_node * a)94 void peephole::optimize_cc_op2(alu_node* a) {
95 
96 	unsigned flags = a->bc.op_ptr->flags;
97 	unsigned cc = flags & AF_CC_MASK;
98 
99 	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
100 		return;
101 
102 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
103 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
104 
105 	int op_kind = (flags & AF_PRED) ? 1 :
106 			(flags & AF_SET) ? 2 :
107 			(flags & AF_KILL) ? 3 : 0;
108 
109 	bool swapped = false;
110 
111 	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
112 		std::swap(a->src[0],a->src[1]);
113 		swapped = true;
114 		// clear modifiers
115 		memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
116 		memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
117 	}
118 
119 	if (swapped || (a->src[1]->is_const() &&
120 			a->src[1]->literal_value == literal(0))) {
121 
122 		value *s = a->src[0];
123 
124 		bool_op_info bop = {};
125 
126 		PPH_DUMP(
127 			sblog << "cc_op2: ";
128 			dump::dump_op(a);
129 			sblog << "\n";
130 		);
131 
132 		if (!get_bool_op_info(s, bop))
133 			return;
134 
135 		if (cc == AF_CC_E)
136 			bop.invert = !bop.invert;
137 
138 		bool swap_args = false;
139 
140 		cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
141 
142 		if (bop.invert)
143 			cc = invert_setcc_condition(cc, swap_args);
144 
145 		if (bop.int_cvt) {
146 			assert(cmp_type != AF_FLOAT_CMP);
147 			cmp_type = AF_FLOAT_CMP;
148 		}
149 
150 		PPH_DUMP(
151 			sblog << "boi node: ";
152 			dump::dump_op(bop.n);
153 			sblog << " invert: " << bop.invert << "  int_cvt: " << bop.int_cvt;
154 			sblog <<"\n";
155 		);
156 
157 		unsigned newop;
158 
159 		switch(op_kind) {
160 		case 1:
161 			newop = get_predsetcc_op(cc, cmp_type);
162 			break;
163 		case 2:
164 			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
165 			break;
166 		case 3:
167 			newop = get_killcc_op(cc, cmp_type);
168 			break;
169 		default:
170 			newop = ALU_OP0_NOP;
171 			assert(!"invalid op kind");
172 			break;
173 		}
174 
175 		a->bc.set_op(newop);
176 
177 		if (swap_args) {
178 			a->src[0] = bop.n->src[1];
179 			a->src[1] = bop.n->src[0];
180 			a->bc.src[0] = bop.n->bc.src[1];
181 			a->bc.src[1] = bop.n->bc.src[0];
182 
183 		} else {
184 			a->src[0] = bop.n->src[0];
185 			a->src[1] = bop.n->src[1];
186 			a->bc.src[0] = bop.n->bc.src[0];
187 			a->bc.src[1] = bop.n->bc.src[1];
188 		}
189 	}
190 }
191 
optimize_CNDcc_op(alu_node * a)192 void peephole::optimize_CNDcc_op(alu_node* a) {
193 	unsigned flags = a->bc.op_ptr->flags;
194 	unsigned cc = flags & AF_CC_MASK;
195 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196 	bool swap = false;
197 
198 	if (cc == AF_CC_E) {
199 		swap = !swap;
200 		cc = AF_CC_NE;
201 	} else if (cc != AF_CC_NE)
202 		return;
203 
204 	value *s = a->src[0];
205 
206 	bool_op_info bop = {};
207 
208 	PPH_DUMP(
209 		sblog << "cndcc: ";
210 		dump::dump_op(a);
211 		sblog << "\n";
212 	);
213 
214 	if (!get_bool_op_info(s, bop))
215 		return;
216 
217 	alu_node *d = bop.n;
218 
219 	if (d->bc.omod)
220 		return;
221 
222 	PPH_DUMP(
223 		sblog << "cndcc def: ";
224 		dump::dump_op(d);
225 		sblog << "\n";
226 	);
227 
228 
229 	unsigned dflags = d->bc.op_ptr->flags;
230 	unsigned dcc = dflags & AF_CC_MASK;
231 	unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
232 	unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
233 	int nds;
234 
235 	// TODO we can handle some of these cases,
236 	// though probably this shouldn't happen
237 	if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
238 		return;
239 
240 	if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
241 		nds = 1;
242 	else if ((d->src[1]->is_const() &&
243 			d->src[1]->literal_value == literal(0)))
244 		nds = 0;
245 	else
246 		return;
247 
248 	// can't propagate ABS modifier to CNDcc because it's OP3
249 	if (d->bc.src[nds].abs)
250 		return;
251 
252 	// TODO we can handle some cases for uint comparison
253 	if (dcmp_type == AF_UINT_CMP)
254 		return;
255 
256 	if (dcc == AF_CC_NE) {
257 		dcc = AF_CC_E;
258 		swap = !swap;
259 	}
260 
261 	if (nds == 1) {
262 		switch (dcc) {
263 		case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
264 		case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
265 		default: break;
266 		}
267 	}
268 
269 	a->src[0] = d->src[nds];
270 	a->bc.src[0] = d->bc.src[nds];
271 
272 	if (swap) {
273 		std::swap(a->src[1], a->src[2]);
274 		std::swap(a->bc.src[1], a->bc.src[2]);
275 	}
276 
277 	a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
278 
279 }
280 
get_bool_flt_to_int_source(alu_node * & a)281 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
282 
283 	if (a->bc.op == ALU_OP1_FLT_TO_INT) {
284 
285 		if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
286 			return false;
287 
288 		value *s = a->src[0];
289 		if (!s || !s->def || !s->def->is_alu_inst())
290 			return false;
291 
292 		alu_node *dn = static_cast<alu_node*>(s->def);
293 
294 		if (dn->is_alu_op(ALU_OP1_TRUNC)) {
295 			s = dn->src[0];
296 			if (!s || !s->def || !s->def->is_alu_inst())
297 				return false;
298 
299 			if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
300 					dn->bc.src[0].rel != 0) {
301 				return false;
302 			}
303 
304 			dn = static_cast<alu_node*>(s->def);
305 
306 		}
307 
308 		if (dn->bc.op_ptr->flags & AF_SET) {
309 			a = dn;
310 			return true;
311 		}
312 	}
313 	return false;
314 }
315 
get_bool_op_info(value * b,bool_op_info & bop)316 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
317 
318 	node *d = b->def;
319 
320 	if (!d || !d->is_alu_inst())
321 		return false;
322 
323 	alu_node *dn = static_cast<alu_node*>(d);
324 
325 	if (dn->bc.op_ptr->flags & AF_SET) {
326 		bop.n = dn;
327 
328 		if (dn->bc.op_ptr->flags & AF_DX10)
329 			bop.int_cvt = true;
330 
331 		return true;
332 	}
333 
334 	if (get_bool_flt_to_int_source(dn)) {
335 		bop.n = dn;
336 		bop.int_cvt = true;
337 		return true;
338 	}
339 
340 	return false;
341 }
342 
343 } // namespace r600_sb
344