• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #include <cmath>
28 
29 #include "sb_shader.h"
30 
31 namespace r600_sb {
32 
get_select_value_for_em(shader & sh,value * em)33 value* get_select_value_for_em(shader& sh, value* em) {
34 	if (!em->def)
35 		return NULL;
36 
37 	node *predset = em->def;
38 	if (!predset->is_pred_set())
39 		return NULL;
40 
41 	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 	convert_predset_to_set(sh, s);
43 
44 	predset->insert_after(s);
45 
46 	value* &d0 = s->dst[0];
47 	d0 = sh.create_temp_value();
48 	d0->def = s;
49 	return d0;
50 }
51 
convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 	n.src.resize(1);
54 	n.src[0] = src;
55 	n.bc.src[0].abs = abs;
56 	n.bc.src[0].neg = neg;
57 	n.bc.set_op(ALU_OP1_MOV);
58 }
59 
expr_handler(shader & sh)60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61 
get_const(const literal & l)62 value * expr_handler::get_const(const literal &l) {
63 	value *v = sh.get_const_value(l);
64 	if (!v->gvn_source)
65 		vt.add_value(v);
66 	return v;
67 }
68 
assign_source(value * dst,value * src)69 void expr_handler::assign_source(value *dst, value *src) {
70 	dst->gvn_source = src->gvn_source;
71 }
72 
equal(value * l,value * r)73 bool expr_handler::equal(value *l, value *r) {
74 
75 	assert(l != r);
76 
77 	if (l->gvalue() == r->gvalue())
78 		return true;
79 
80 	if (l->def && r->def)
81 		return defs_equal(l, r);
82 
83 	if (l->is_rel() && r->is_rel())
84 		return ivars_equal(l, r);
85 
86 	return false;
87 }
88 
ivars_equal(value * l,value * r)89 bool expr_handler::ivars_equal(value* l, value* r) {
90 	if (l->rel->gvalue() == r->rel->gvalue()
91 			&& l->select == r->select) {
92 
93 		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
94 		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
95 
96 		// FIXME: replace this with more precise aliasing test
97 		return lv == rv;
98 	}
99 	return false;
100 }
101 
defs_equal(value * l,value * r)102 bool expr_handler::defs_equal(value* l, value* r) {
103 
104 	node *d1 = l->def;
105 	node *d2 = r->def;
106 
107 	if (d1->type != d2->type || d1->subtype != d2->subtype)
108 		return false;
109 
110 	if (d1->is_pred_set() || d2->is_pred_set())
111 		return false;
112 
113 	if (d1->type == NT_OP) {
114 		switch (d1->subtype) {
115 		case NST_ALU_INST:
116 			return ops_equal(
117 					static_cast<alu_node*>(d1),
118 					static_cast<alu_node*>(d2));
119 //		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 //			static_cast<fetch_node*>(d2);
121 //		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 //			static_cast<cf_node*>(d2);
123 		default:
124 			break;
125 		}
126 	}
127 	return false;
128 }
129 
try_fold(value * v)130 bool expr_handler::try_fold(value* v) {
131 	assert(!v->gvn_source);
132 
133 	if (v->def)
134 		try_fold(v->def);
135 
136 	if (v->gvn_source)
137 		return true;
138 
139 	return false;
140 }
141 
try_fold(node * n)142 bool expr_handler::try_fold(node* n) {
143 	return n->fold_dispatch(this);
144 }
145 
fold(node & n)146 bool expr_handler::fold(node& n) {
147 	if (n.subtype == NST_PHI) {
148 
149 		value *s = n.src[0];
150 
151 		// FIXME disabling phi folding for registers for now, otherwise we lose
152 		// control flow information in some cases
153 		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 		// probably control flow transformation is required to enable it
155 		if (s->is_sgpr())
156 			return false;
157 
158 		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
159 			value *v = *I;
160 			if (!s->v_equal(v))
161 				return false;
162 		}
163 
164 		assign_source(n.dst[0], s);
165 	} else {
166 		assert(n.subtype == NST_PSI);
167 		assert(n.src.size() >= 6);
168 
169 		value *s = n.src[2];
170 		assert(s->gvn_source);
171 
172 		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
173 			value *v = *(I+2);
174 			if (!s->v_equal(v))
175 				return false;
176 		}
177 		assign_source(n.dst[0], s);
178 	}
179 	return true;
180 }
181 
fold(container_node & n)182 bool expr_handler::fold(container_node& n) {
183 	return false;
184 }
185 
fold_setcc(alu_node & n)186 bool expr_handler::fold_setcc(alu_node &n) {
187 
188 	value* v0 = n.src[0]->gvalue();
189 	value* v1 = n.src[1]->gvalue();
190 
191 	assert(v0 && v1 && n.dst[0]);
192 
193 	unsigned flags = n.bc.op_ptr->flags;
194 	unsigned cc = flags & AF_CC_MASK;
195 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
197 
198 	bool cond_result;
199 	bool have_result = false;
200 
201 	bool isc0 = v0->is_const();
202 	bool isc1 = v1->is_const();
203 
204 	literal dv, cv0, cv1;
205 
206 	if (isc0) {
207 		cv0 = v0->get_const_value();
208 		apply_alu_src_mod(n.bc, 0, cv0);
209 	}
210 
211 	if (isc1) {
212 		cv1 = v1->get_const_value();
213 		apply_alu_src_mod(n.bc, 1, cv1);
214 	}
215 
216 	if (isc0 && isc1) {
217 		cond_result = evaluate_condition(flags, cv0, cv1);
218 		have_result = true;
219 	} else if (isc1) {
220 		if (cmp_type == AF_FLOAT_CMP) {
221 			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
222 				if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
223 					cond_result = true;
224 					have_result = true;
225 				} else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
226 					cond_result = true;
227 					have_result = true;
228 				}
229 			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
230 				if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
231 					cond_result = false;
232 					have_result = true;
233 				} else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
234 					cond_result = false;
235 					have_result = true;
236 				}
237 			}
238 		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
239 			cond_result = true;
240 			have_result = true;
241 		}
242 	} else if (isc0) {
243 		if (cmp_type == AF_FLOAT_CMP) {
244 			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
245 				if (cv0.f <= 0.0f && cc == AF_CC_GT) {
246 					cond_result = false;
247 					have_result = true;
248 				} else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
249 					cond_result = false;
250 					have_result = true;
251 				}
252 			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
253 				if (cv0.f >= 0.0f && cc == AF_CC_GE) {
254 					cond_result = true;
255 					have_result = true;
256 				} else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
257 					cond_result = true;
258 					have_result = true;
259 				}
260 			}
261 		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
262 			cond_result = false;
263 			have_result = true;
264 		}
265 	} else if (v0 == v1) {
266 		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
267 		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
268 			// NOTE can't handle float comparisons here because of NaNs
269 			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
270 			have_result = true;
271 		}
272 	}
273 
274 	if (have_result) {
275 		literal result;
276 
277 		if (cond_result)
278 			result = dst_type != AF_FLOAT_DST ?
279 					literal(0xFFFFFFFFu) : literal(1.0f);
280 		else
281 			result = literal(0);
282 
283 		convert_to_mov(n, sh.get_const_value(result));
284 		return fold_alu_op1(n);
285 	}
286 
287 	return false;
288 }
289 
fold(alu_node & n)290 bool expr_handler::fold(alu_node& n) {
291 
292 	switch (n.bc.op_ptr->src_count) {
293 	case 1: return fold_alu_op1(n);
294 	case 2: return fold_alu_op2(n);
295 	case 3: return fold_alu_op3(n);
296 	default:
297 		assert(0);
298 	}
299 	return false;
300 }
301 
fold(fetch_node & n)302 bool expr_handler::fold(fetch_node& n) {
303 
304 	unsigned chan = 0;
305 	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
306 		value* &v = *I;
307 		if (v) {
308 			if (n.bc.dst_sel[chan] == SEL_0)
309 				assign_source(*I, get_const(0.0f));
310 			else if (n.bc.dst_sel[chan] == SEL_1)
311 				assign_source(*I, get_const(1.0f));
312 		}
313 		++chan;
314 	}
315 	return false;
316 }
317 
fold(cf_node & n)318 bool expr_handler::fold(cf_node& n) {
319 	return false;
320 }
321 
apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)322 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
323                                      literal &v) {
324 	const bc_alu_src &s = bc.src[src];
325 
326 	if (s.abs)
327 		v = fabs(v.f);
328 	if (s.neg)
329 		v = -v.f;
330 }
331 
apply_alu_dst_mod(const bc_alu & bc,literal & v)332 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
333 	float omod_coeff[] = {2.0f, 4.0, 0.5f};
334 
335 	if (bc.omod)
336 		v = v.f * omod_coeff[bc.omod - 1];
337 	if (bc.clamp)
338 		v = float_clamp(v.f);
339 }
340 
args_equal(const vvec & l,const vvec & r)341 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
342 
343 	assert(l.size() == r.size());
344 
345 	int s = l.size();
346 
347 	for (int k = 0; k < s; ++k) {
348 		if (!l[k]->v_equal(r[k]))
349 			return false;
350 	}
351 
352 	return true;
353 }
354 
ops_equal(const alu_node * l,const alu_node * r)355 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
356 	const bc_alu &b0 = l->bc;
357 	const bc_alu &b1 = r->bc;
358 
359 	if (b0.op != b1.op)
360 		return false;
361 
362 	unsigned src_count = b0.op_ptr->src_count;
363 
364 	if (b0.index_mode != b1.index_mode)
365 		return false;
366 
367 	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
368 			return false;
369 
370 	for (unsigned s = 0; s < src_count; ++s) {
371 		const bc_alu_src &s0 = b0.src[s];
372 		const bc_alu_src &s1 = b1.src[s];
373 
374 		if (s0.abs != s1.abs || s0.neg != s1.neg)
375 			return false;
376 	}
377 	return args_equal(l->src, r->src);
378 }
379 
fold_alu_op1(alu_node & n)380 bool expr_handler::fold_alu_op1(alu_node& n) {
381 
382 	assert(!n.src.empty());
383 	if (n.src.empty())
384 		return false;
385 
386 	value* v0 = n.src[0]->gvalue();
387 
388 	assert(v0 && n.dst[0]);
389 
390 	if (!v0->is_const()) {
391 		// handle (MOV -(MOV -x)) => (MOV x)
392 		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
393 				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
394 			alu_node *sd = static_cast<alu_node*>(v0->def);
395 			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
396 					sd->bc.src[0].neg) {
397 				n.src[0] = sd->src[0];
398 				n.bc.src[0].neg = 0;
399 				v0 = n.src[0]->gvalue();
400 			}
401 		}
402 
403 		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
404 				n.bc.op == ALU_OP1_MOVA_GPR_INT)
405 				&& n.bc.clamp == 0 && n.bc.omod == 0
406 				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
407 				n.src.size() == 1 /* RIM/SIM can be appended as additional values */) {
408 			assign_source(n.dst[0], v0);
409 			return true;
410 		}
411 		return false;
412 	}
413 
414 	literal dv, cv = v0->get_const_value();
415 	apply_alu_src_mod(n.bc, 0, cv);
416 
417 	switch (n.bc.op) {
418 	case ALU_OP1_CEIL: dv = ceil(cv.f); break;
419 	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
420 	case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
421 	case ALU_OP1_FLOOR: dv = floor(cv.f); break;
422 	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
423 	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
424 	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
425 	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
426 	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
427 	case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
428 	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
429 	case ALU_OP1_LOG_CLAMPED:
430 	case ALU_OP1_LOG_IEEE:
431 		if (cv.f != 0.0f)
432 			dv = log2(cv.f);
433 		else
434 			// don't fold to NAN, let the GPU handle it for now
435 			// (prevents degenerate LIT tests from failing)
436 			return false;
437 		break;
438 	case ALU_OP1_MOV: dv = cv; break;
439 	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
440 //	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
441 //	case ALU_OP1_MOVA_GPR_INT:
442 	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
443 	case ALU_OP1_PRED_SET_INV:
444 		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
445 	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
446 	case ALU_OP1_RECIPSQRT_CLAMPED:
447 	case ALU_OP1_RECIPSQRT_FF:
448 	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
449 	case ALU_OP1_RECIP_CLAMPED:
450 	case ALU_OP1_RECIP_FF:
451 	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
452 //	case ALU_OP1_RECIP_INT:
453 	case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
454 //	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
455 	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
456 	case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
457 	case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
458 
459 	default:
460 		return false;
461 	}
462 
463 	apply_alu_dst_mod(n.bc, dv);
464 	assign_source(n.dst[0], get_const(dv));
465 	return true;
466 }
467 
fold_mul_add(alu_node * n)468 bool expr_handler::fold_mul_add(alu_node *n) {
469 
470 	bool ieee;
471 	value* v0 = n->src[0]->gvalue();
472 
473 	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
474 			static_cast<alu_node*>(v0->def) : NULL;
475 
476 	if (d0) {
477 		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
478 			ieee = true;
479 		else if (d0->is_alu_op(ALU_OP2_MUL))
480 			ieee = false;
481 		else
482 			return false;
483 
484 		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
485 				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
486 				!d0->bc.clamp && !n->bc.omod &&
487 				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
488 						!n->src[1]->is_kcache())) {
489 
490 			bool mul_neg = n->bc.src[0].neg;
491 
492 			n->src.resize(3);
493 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
494 			n->src[2] = n->src[1];
495 			n->bc.src[2] = n->bc.src[1];
496 			n->src[0] = d0->src[0];
497 			n->bc.src[0] = d0->bc.src[0];
498 			n->src[1] = d0->src[1];
499 			n->bc.src[1] = d0->bc.src[1];
500 
501 			n->bc.src[0].neg ^= mul_neg;
502 
503 			fold_alu_op3(*n);
504 			return true;
505 		}
506 	}
507 
508 	value* v1 = n->src[1]->gvalue();
509 
510 	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
511 			static_cast<alu_node*>(v1->def) : NULL;
512 
513 	if (d1) {
514 		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
515 			ieee = true;
516 		else if (d1->is_alu_op(ALU_OP2_MUL))
517 			ieee = false;
518 		else
519 			return false;
520 
521 		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
522 				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
523 				!d1->bc.clamp && !n->bc.omod &&
524 				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
525 						!n->src[0]->is_kcache())) {
526 
527 			bool mul_neg = n->bc.src[1].neg;
528 
529 			n->src.resize(3);
530 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
531 			n->src[2] = n->src[0];
532 			n->bc.src[2] = n->bc.src[0];
533 			n->src[1] = d1->src[1];
534 			n->bc.src[1] = d1->bc.src[1];
535 			n->src[0] = d1->src[0];
536 			n->bc.src[0] = d1->bc.src[0];
537 
538 			n->bc.src[1].neg ^= mul_neg;
539 
540 			fold_alu_op3(*n);
541 			return true;
542 		}
543 	}
544 
545 	return false;
546 }
547 
eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)548 bool expr_handler::eval_const_op(unsigned op, literal &r,
549                                  literal cv0, literal cv1) {
550 
551 	switch (op) {
552 	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
553 	case ALU_OP2_ADDC_UINT:
554 		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
555 	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
556 	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
557 	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
558 	case ALU_OP2_BFM_INT:
559 		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
560 	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
561 	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
562 	case ALU_OP2_MAX:
563 	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
564 	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
565 	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
566 	case ALU_OP2_MIN:
567 	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
568 	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
569 	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
570 	case ALU_OP2_MUL:
571 	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
572 	case ALU_OP2_MULHI_INT:
573 		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
574 	case ALU_OP2_MULHI_UINT:
575 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
576 	case ALU_OP2_MULLO_INT:
577 		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
578 	case ALU_OP2_MULLO_UINT:
579 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
580 	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
581 	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
582 	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
583 
584 	default:
585 		return false;
586 	}
587 
588 	return true;
589 }
590 
591 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
fold_assoc(alu_node * n)592 bool expr_handler::fold_assoc(alu_node *n) {
593 
594 	alu_node *a = n;
595 	literal cr;
596 
597 	int last_arg = -3;
598 
599 	unsigned op = n->bc.op;
600 	bool allow_neg = false, cur_neg = false;
601 	bool distribute_neg = false;
602 
603 	switch(op) {
604 	case ALU_OP2_ADD:
605 		distribute_neg = true;
606 		allow_neg = true;
607 		break;
608 	case ALU_OP2_MUL:
609 	case ALU_OP2_MUL_IEEE:
610 		allow_neg = true;
611 		break;
612 	case ALU_OP3_MULADD:
613 		allow_neg = true;
614 		op = ALU_OP2_MUL;
615 		break;
616 	case ALU_OP3_MULADD_IEEE:
617 		allow_neg = true;
618 		op = ALU_OP2_MUL_IEEE;
619 		break;
620 	default:
621 		if (n->bc.op_ptr->src_count != 2)
622 			return false;
623 	}
624 
625 	// check if we can evaluate the op
626 	if (!eval_const_op(op, cr, literal(0), literal(0)))
627 		return false;
628 
629 	while (true) {
630 
631 		value *v0 = a->src[0]->gvalue();
632 		value *v1 = a->src[1]->gvalue();
633 
634 		last_arg = -2;
635 
636 		if (v1->is_const()) {
637 			literal arg = v1->get_const_value();
638 			apply_alu_src_mod(a->bc, 1, arg);
639 			if (cur_neg && distribute_neg)
640 				arg.f = -arg.f;
641 
642 			if (a == n)
643 				cr = arg;
644 			else
645 				eval_const_op(op, cr, cr, arg);
646 
647 			if (v0->def) {
648 				alu_node *d0 = static_cast<alu_node*>(v0->def);
649 				if ((d0->is_alu_op(op) ||
650 						(op == ALU_OP2_MUL_IEEE &&
651 								d0->is_alu_op(ALU_OP2_MUL))) &&
652 						!d0->bc.omod && !d0->bc.clamp &&
653 						!a->bc.src[0].abs &&
654 						(!a->bc.src[0].neg || allow_neg)) {
655 					cur_neg ^= a->bc.src[0].neg;
656 					a = d0;
657 					continue;
658 				}
659 			}
660 			last_arg = 0;
661 
662 		}
663 
664 		if (v0->is_const()) {
665 			literal arg = v0->get_const_value();
666 			apply_alu_src_mod(a->bc, 0, arg);
667 			if (cur_neg && distribute_neg)
668 				arg.f = -arg.f;
669 
670 			if (last_arg == 0) {
671 				eval_const_op(op, cr, cr, arg);
672 				last_arg = -1;
673 				break;
674 			}
675 
676 			if (a == n)
677 				cr = arg;
678 			else
679 				eval_const_op(op, cr, cr, arg);
680 
681 			if (v1->def) {
682 				alu_node *d1 = static_cast<alu_node*>(v1->def);
683 				if ((d1->is_alu_op(op) ||
684 						(op == ALU_OP2_MUL_IEEE &&
685 								d1->is_alu_op(ALU_OP2_MUL))) &&
686 						!d1->bc.omod && !d1->bc.clamp &&
687 						!a->bc.src[1].abs &&
688 						(!a->bc.src[1].neg || allow_neg)) {
689 					cur_neg ^= a->bc.src[1].neg;
690 					a = d1;
691 					continue;
692 				}
693 			}
694 
695 			last_arg = 1;
696 		}
697 
698 		break;
699 	};
700 
701 	if (last_arg == -1) {
702 		// result is const
703 		apply_alu_dst_mod(n->bc, cr);
704 
705 		if (n->bc.op == op) {
706 			convert_to_mov(*n, sh.get_const_value(cr));
707 			fold_alu_op1(*n);
708 			return true;
709 		} else { // MULADD => ADD
710 			n->src[0] = n->src[2];
711 			n->bc.src[0] = n->bc.src[2];
712 			n->src[1] = sh.get_const_value(cr);
713 			memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
714 
715 			n->src.resize(2);
716 			n->bc.set_op(ALU_OP2_ADD);
717 		}
718 	} else if (last_arg >= 0) {
719 		n->src[0] = a->src[last_arg];
720 		n->bc.src[0] = a->bc.src[last_arg];
721 		n->bc.src[0].neg ^= cur_neg;
722 		n->src[1] = sh.get_const_value(cr);
723 		memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
724 	}
725 
726 	return false;
727 }
728 
fold_alu_op2(alu_node & n)729 bool expr_handler::fold_alu_op2(alu_node& n) {
730 
731 	if (n.src.size() < 2)
732 		return false;
733 
734 	unsigned flags = n.bc.op_ptr->flags;
735 
736 	if (flags & AF_SET) {
737 		return fold_setcc(n);
738 	}
739 
740 	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
741 		if (fold_assoc(&n))
742 			return true;
743 	}
744 
745 	value* v0 = n.src[0]->gvalue();
746 	value* v1 = n.src[1]->gvalue();
747 
748 	assert(v0 && v1);
749 
750 	// handle some operations with equal args, e.g. x + x => x * 2
751 	if (v0 == v1) {
752 		if (n.bc.src[0].neg == n.bc.src[1].neg &&
753 				n.bc.src[0].abs == n.bc.src[1].abs) {
754 			switch (n.bc.op) {
755 			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
756 			case ALU_OP2_MAX:
757 				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
758 				return fold_alu_op1(n);
759 			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
760 				if (!sh.safe_math) {
761 					n.src[1] = sh.get_const_value(2.0f);
762 					memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
763 					n.bc.set_op(ALU_OP2_MUL);
764 					return fold_alu_op2(n);
765 				}
766 				break;
767 			}
768 		}
769 		if (n.bc.src[0].neg != n.bc.src[1].neg &&
770 				n.bc.src[0].abs == n.bc.src[1].abs) {
771 			switch (n.bc.op) {
772 			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
773 				if (!sh.safe_math) {
774 					convert_to_mov(n, sh.get_const_value(literal(0)));
775 					return fold_alu_op1(n);
776 				}
777 				break;
778 			}
779 		}
780 	}
781 
782 	if (n.bc.op == ALU_OP2_ADD) {
783 		if (fold_mul_add(&n))
784 			return true;
785 	}
786 
787 	bool isc0 = v0->is_const();
788 	bool isc1 = v1->is_const();
789 
790 	if (!isc0 && !isc1)
791 		return false;
792 
793 	literal dv, cv0, cv1;
794 
795 	if (isc0) {
796 		cv0 = v0->get_const_value();
797 		apply_alu_src_mod(n.bc, 0, cv0);
798 	}
799 
800 	if (isc1) {
801 		cv1 = v1->get_const_value();
802 		apply_alu_src_mod(n.bc, 1, cv1);
803 	}
804 
805 	if (isc0 && isc1) {
806 
807 		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
808 			return false;
809 
810 	} else { // one source is const
811 
812 		if (isc0 && cv0 == literal(0)) {
813 			switch (n.bc.op) {
814 			case ALU_OP2_ADD:
815 			case ALU_OP2_ADD_INT:
816 			case ALU_OP2_MAX_UINT:
817 			case ALU_OP2_OR_INT:
818 			case ALU_OP2_XOR_INT:
819 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
820 				return fold_alu_op1(n);
821 			case ALU_OP2_AND_INT:
822 			case ALU_OP2_ASHR_INT:
823 			case ALU_OP2_LSHL_INT:
824 			case ALU_OP2_LSHR_INT:
825 			case ALU_OP2_MIN_UINT:
826 			case ALU_OP2_MUL:
827 			case ALU_OP2_MULHI_UINT:
828 			case ALU_OP2_MULLO_UINT:
829 				convert_to_mov(n, sh.get_const_value(literal(0)));
830 				return fold_alu_op1(n);
831 			}
832 		} else if (isc1 && cv1 == literal(0)) {
833 			switch (n.bc.op) {
834 			case ALU_OP2_ADD:
835 			case ALU_OP2_ADD_INT:
836 			case ALU_OP2_ASHR_INT:
837 			case ALU_OP2_LSHL_INT:
838 			case ALU_OP2_LSHR_INT:
839 			case ALU_OP2_MAX_UINT:
840 			case ALU_OP2_OR_INT:
841 			case ALU_OP2_SUB_INT:
842 			case ALU_OP2_XOR_INT:
843 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
844 				return fold_alu_op1(n);
845 			case ALU_OP2_AND_INT:
846 			case ALU_OP2_MIN_UINT:
847 			case ALU_OP2_MUL:
848 			case ALU_OP2_MULHI_UINT:
849 			case ALU_OP2_MULLO_UINT:
850 				convert_to_mov(n, sh.get_const_value(literal(0)));
851 				return fold_alu_op1(n);
852 			}
853 		} else if (isc0 && cv0 == literal(1.0f)) {
854 			switch (n.bc.op) {
855 			case ALU_OP2_MUL:
856 			case ALU_OP2_MUL_IEEE:
857 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
858 				return fold_alu_op1(n);
859 			}
860 		} else if (isc1 && cv1 == literal(1.0f)) {
861 			switch (n.bc.op) {
862 			case ALU_OP2_MUL:
863 			case ALU_OP2_MUL_IEEE:
864 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
865 				return fold_alu_op1(n);
866 			}
867 		}
868 
869 		return false;
870 	}
871 
872 	apply_alu_dst_mod(n.bc, dv);
873 	assign_source(n.dst[0], get_const(dv));
874 	return true;
875 }
876 
evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)877 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
878                                       literal s1, literal s2) {
879 
880 	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
881 	unsigned cc = alu_cnd_flags & AF_CC_MASK;
882 
883 	switch (cmp_type) {
884 	case AF_FLOAT_CMP: {
885 		switch (cc) {
886 		case AF_CC_E : return s1.f == s2.f;
887 		case AF_CC_GT: return s1.f >  s2.f;
888 		case AF_CC_GE: return s1.f >= s2.f;
889 		case AF_CC_NE: return s1.f != s2.f;
890 		case AF_CC_LT: return s1.f <  s2.f;
891 		case AF_CC_LE: return s1.f <= s2.f;
892 		default:
893 			assert(!"invalid condition code");
894 			return false;
895 		}
896 	}
897 	case AF_INT_CMP: {
898 		switch (cc) {
899 		case AF_CC_E : return s1.i == s2.i;
900 		case AF_CC_GT: return s1.i >  s2.i;
901 		case AF_CC_GE: return s1.i >= s2.i;
902 		case AF_CC_NE: return s1.i != s2.i;
903 		case AF_CC_LT: return s1.i <  s2.i;
904 		case AF_CC_LE: return s1.i <= s2.i;
905 		default:
906 			assert(!"invalid condition code");
907 			return false;
908 		}
909 	}
910 	case AF_UINT_CMP: {
911 		switch (cc) {
912 		case AF_CC_E : return s1.u == s2.u;
913 		case AF_CC_GT: return s1.u >  s2.u;
914 		case AF_CC_GE: return s1.u >= s2.u;
915 		case AF_CC_NE: return s1.u != s2.u;
916 		case AF_CC_LT: return s1.u <  s2.u;
917 		case AF_CC_LE: return s1.u <= s2.u;
918 		default:
919 			assert(!"invalid condition code");
920 			return false;
921 		}
922 	}
923 	default:
924 		assert(!"invalid cmp_type");
925 		return false;
926 	}
927 }
928 
fold_alu_op3(alu_node & n)929 bool expr_handler::fold_alu_op3(alu_node& n) {
930 
931 	if (n.src.size() < 3)
932 		return false;
933 
934 	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
935 		if (fold_assoc(&n))
936 			return true;
937 	}
938 
939 	value* v0 = n.src[0]->gvalue();
940 	value* v1 = n.src[1]->gvalue();
941 	value* v2 = n.src[2]->gvalue();
942 
943 	assert(v0 && v1 && v2 && n.dst[0]);
944 
945 	bool isc0 = v0->is_const();
946 	bool isc1 = v1->is_const();
947 	bool isc2 = v2->is_const();
948 
949 	literal dv, cv0, cv1, cv2;
950 
951 	if (isc0) {
952 		cv0 = v0->get_const_value();
953 		apply_alu_src_mod(n.bc, 0, cv0);
954 	}
955 
956 	if (isc1) {
957 		cv1 = v1->get_const_value();
958 		apply_alu_src_mod(n.bc, 1, cv1);
959 	}
960 
961 	if (isc2) {
962 		cv2 = v2->get_const_value();
963 		apply_alu_src_mod(n.bc, 2, cv2);
964 	}
965 
966 	unsigned flags = n.bc.op_ptr->flags;
967 
968 	if (flags & AF_CMOV) {
969 		int src = 0;
970 
971 		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
972 			// result doesn't depend on condition, convert to MOV
973 			src = 1;
974 		} else if (isc0) {
975 			// src0 is const, condition can be evaluated, convert to MOV
976 			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
977 					AF_CMP_TYPE_MASK), cv0, literal(0));
978 			src = cond ? 1 : 2;
979 		}
980 
981 		if (src) {
982 			// if src is selected, convert to MOV
983 			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
984 			return fold_alu_op1(n);
985 		}
986 	}
987 
988 	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
989 	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
990 			n.bc.op == ALU_OP3_MULADD_IEEE)) {
991 
992 		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
993 				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
994 
995 		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
996 
997 			alu_node *md = static_cast<alu_node*>(v2->def);
998 			value *mv0 = md->src[0]->gvalue();
999 			value *mv1 = md->src[1]->gvalue();
1000 
1001 			int es0 = -1, es1;
1002 
1003 			if (v0 == mv0) {
1004 				es0 = 0;
1005 				es1 = 0;
1006 			} else if (v0 == mv1) {
1007 				es0 = 0;
1008 				es1 = 1;
1009 			} else if (v1 == mv0) {
1010 				es0 = 1;
1011 				es1 = 0;
1012 			} else if (v1 == mv1) {
1013 				es0 = 1;
1014 				es1 = 1;
1015 			}
1016 
1017 			if (es0 != -1) {
1018 				value *va0 = es0 == 0 ? v1 : v0;
1019 				value *va1 = es1 == 0 ? mv1 : mv0;
1020 
1021 				alu_node *add = sh.create_alu();
1022 				add->bc.set_op(ALU_OP2_ADD);
1023 
1024 				add->dst.resize(1);
1025 				add->src.resize(2);
1026 
1027 				value *t = sh.create_temp_value();
1028 				t->def = add;
1029 				add->dst[0] = t;
1030 				add->src[0] = va0;
1031 				add->src[1] = va1;
1032 				add->bc.src[0] = n.bc.src[!es0];
1033 				add->bc.src[1] = md->bc.src[!es1];
1034 
1035 				add->bc.src[1].neg ^= n.bc.src[2].neg ^
1036 						(n.bc.src[es0].neg != md->bc.src[es1].neg);
1037 
1038 				n.insert_before(add);
1039 				vt.add_value(t);
1040 
1041 				t = t->gvalue();
1042 
1043 				if (es0 == 1) {
1044 					n.src[0] = n.src[1];
1045 					n.bc.src[0] = n.bc.src[1];
1046 				}
1047 
1048 				n.src[1] = t;
1049 				memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
1050 
1051 				n.src.resize(2);
1052 
1053 				n.bc.set_op(op);
1054 				return fold_alu_op2(n);
1055 			}
1056 		}
1057 	}
1058 
1059 	if (!isc0 && !isc1 && !isc2)
1060 		return false;
1061 
1062 	if (isc0 && isc1 && isc2) {
1063 		switch (n.bc.op) {
1064 		case ALU_OP3_MULADD_IEEE:
1065 		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1066 
1067 		// TODO
1068 
1069 		default:
1070 			return false;
1071 		}
1072 	} else {
1073 		if (isc0 && isc1) {
1074 			switch (n.bc.op) {
1075 			case ALU_OP3_MULADD:
1076 			case ALU_OP3_MULADD_IEEE:
1077 				dv = cv0.f * cv1.f;
1078 				n.bc.set_op(ALU_OP2_ADD);
1079 				n.src[0] = sh.get_const_value(dv);
1080 				memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
1081 				n.src[1] = n.src[2];
1082 				n.bc.src[1] = n.bc.src[2];
1083 				n.src.resize(2);
1084 				return fold_alu_op2(n);
1085 			}
1086 		}
1087 
1088 		if (n.bc.op == ALU_OP3_MULADD) {
1089 			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1090 				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
1091 				return fold_alu_op1(n);
1092 			}
1093 		}
1094 
1095 		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1096 			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1097 					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1098 
1099 			if (isc1 && v0 == v2) {
1100 				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1101 				n.src[1] = sh.get_const_value(cv1);
1102 				n.bc.src[1].neg = 0;
1103 				n.bc.src[1].abs = 0;
1104 				n.bc.set_op(op);
1105 				n.src.resize(2);
1106 				return fold_alu_op2(n);
1107 			} else if (isc0 && v1 == v2) {
1108 				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1109 				n.src[0] = sh.get_const_value(cv0);
1110 				n.bc.src[0].neg = 0;
1111 				n.bc.src[0].abs = 0;
1112 				n.bc.set_op(op);
1113 				n.src.resize(2);
1114 				return fold_alu_op2(n);
1115 			}
1116 		}
1117 
1118 		return false;
1119 	}
1120 
1121 	apply_alu_dst_mod(n.bc, dv);
1122 	assign_source(n.dst[0], get_const(dv));
1123 	return true;
1124 }
1125 
invert_setcc_condition(unsigned cc,bool & swap_args)1126 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1127 	unsigned ncc = 0;
1128 
1129 	switch (cc) {
1130 	case AF_CC_E: ncc = AF_CC_NE; break;
1131 	case AF_CC_NE: ncc = AF_CC_E; break;
1132 	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1133 	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1134 	default:
1135 		assert(!"unexpected condition code");
1136 		break;
1137 	}
1138 	return ncc;
1139 }
1140 
get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1141 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1142 
1143 	if (int_dst && cmp_type == AF_FLOAT_CMP) {
1144 		switch (cc) {
1145 		case AF_CC_E: return ALU_OP2_SETE_DX10;
1146 		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1147 		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1148 		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1149 		}
1150 	} else {
1151 
1152 		switch(cmp_type) {
1153 		case AF_FLOAT_CMP: {
1154 			switch (cc) {
1155 			case AF_CC_E: return ALU_OP2_SETE;
1156 			case AF_CC_NE: return ALU_OP2_SETNE;
1157 			case AF_CC_GT: return ALU_OP2_SETGT;
1158 			case AF_CC_GE: return ALU_OP2_SETGE;
1159 			}
1160 			break;
1161 		}
1162 		case AF_INT_CMP: {
1163 			switch (cc) {
1164 			case AF_CC_E: return ALU_OP2_SETE_INT;
1165 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1166 			case AF_CC_GT: return ALU_OP2_SETGT_INT;
1167 			case AF_CC_GE: return ALU_OP2_SETGE_INT;
1168 			}
1169 			break;
1170 		}
1171 		case AF_UINT_CMP: {
1172 			switch (cc) {
1173 			case AF_CC_E: return ALU_OP2_SETE_INT;
1174 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1175 			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1176 			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1177 			}
1178 			break;
1179 		}
1180 		}
1181 	}
1182 
1183 	assert(!"unexpected cc&cmp_type combination");
1184 	return ~0u;
1185 }
1186 
get_predsetcc_op(unsigned cc,unsigned cmp_type)1187 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1188 
1189 	switch(cmp_type) {
1190 	case AF_FLOAT_CMP: {
1191 		switch (cc) {
1192 		case AF_CC_E: return ALU_OP2_PRED_SETE;
1193 		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1194 		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1195 		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1196 		}
1197 		break;
1198 	}
1199 	case AF_INT_CMP: {
1200 		switch (cc) {
1201 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1202 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1203 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1204 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1205 		}
1206 		break;
1207 	}
1208 	case AF_UINT_CMP: {
1209 		switch (cc) {
1210 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1211 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1212 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1213 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1214 		}
1215 		break;
1216 	}
1217 	}
1218 
1219 	assert(!"unexpected cc&cmp_type combination");
1220 	return ~0u;
1221 }
1222 
get_killcc_op(unsigned cc,unsigned cmp_type)1223 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1224 
1225 	switch(cmp_type) {
1226 	case AF_FLOAT_CMP: {
1227 		switch (cc) {
1228 		case AF_CC_E: return ALU_OP2_KILLE;
1229 		case AF_CC_NE: return ALU_OP2_KILLNE;
1230 		case AF_CC_GT: return ALU_OP2_KILLGT;
1231 		case AF_CC_GE: return ALU_OP2_KILLGE;
1232 		}
1233 		break;
1234 	}
1235 	case AF_INT_CMP: {
1236 		switch (cc) {
1237 		case AF_CC_E: return ALU_OP2_KILLE_INT;
1238 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1239 		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1240 		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1241 		}
1242 		break;
1243 	}
1244 	case AF_UINT_CMP: {
1245 		switch (cc) {
1246 		case AF_CC_E: return ALU_OP2_KILLE_INT;
1247 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1248 		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1249 		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1250 		}
1251 		break;
1252 	}
1253 	}
1254 
1255 	assert(!"unexpected cc&cmp_type combination");
1256 	return ~0u;
1257 }
1258 
get_cndcc_op(unsigned cc,unsigned cmp_type)1259 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1260 
1261 	switch(cmp_type) {
1262 	case AF_FLOAT_CMP: {
1263 		switch (cc) {
1264 		case AF_CC_E: return ALU_OP3_CNDE;
1265 		case AF_CC_GT: return ALU_OP3_CNDGT;
1266 		case AF_CC_GE: return ALU_OP3_CNDGE;
1267 		}
1268 		break;
1269 	}
1270 	case AF_INT_CMP: {
1271 		switch (cc) {
1272 		case AF_CC_E: return ALU_OP3_CNDE_INT;
1273 		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1274 		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1275 		}
1276 		break;
1277 	}
1278 	}
1279 
1280 	assert(!"unexpected cc&cmp_type combination");
1281 	return ~0u;
1282 }
1283 
1284 
convert_predset_to_set(shader & sh,alu_node * a)1285 void convert_predset_to_set(shader& sh, alu_node* a) {
1286 
1287 	unsigned flags = a->bc.op_ptr->flags;
1288 	unsigned cc = flags & AF_CC_MASK;
1289 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1290 
1291 	bool swap_args = false;
1292 
1293 	cc = invert_setcc_condition(cc, swap_args);
1294 
1295 	unsigned newop = get_setcc_op(cc, cmp_type, true);
1296 
1297 	a->dst.resize(1);
1298 	a->bc.set_op(newop);
1299 
1300 	if (swap_args) {
1301 		std::swap(a->src[0], a->src[1]);
1302 		std::swap(a->bc.src[0], a->bc.src[1]);
1303 	}
1304 
1305 	a->bc.update_exec_mask = 0;
1306 	a->bc.update_pred = 0;
1307 }
1308 
1309 } // namespace r600_sb
1310