• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #include <cmath>
28 
29 #include "sb_shader.h"
30 
31 namespace r600_sb {
32 
get_select_value_for_em(shader & sh,value * em)33 value* get_select_value_for_em(shader& sh, value* em) {
34 	if (!em->def)
35 		return NULL;
36 
37 	node *predset = em->def;
38 	if (!predset->is_pred_set())
39 		return NULL;
40 
41 	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 	convert_predset_to_set(sh, s);
43 
44 	predset->insert_after(s);
45 
46 	value* &d0 = s->dst[0];
47 	d0 = sh.create_temp_value();
48 	d0->def = s;
49 	return d0;
50 }
51 
convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 	n.src.resize(1);
54 	n.src[0] = src;
55 	n.bc.src[0].abs = abs;
56 	n.bc.src[0].neg = neg;
57 	n.bc.set_op(ALU_OP1_MOV);
58 }
59 
expr_handler(shader & sh)60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61 
get_const(const literal & l)62 value * expr_handler::get_const(const literal &l) {
63 	value *v = sh.get_const_value(l);
64 	if (!v->gvn_source)
65 		vt.add_value(v);
66 	return v;
67 }
68 
assign_source(value * dst,value * src)69 void expr_handler::assign_source(value *dst, value *src) {
70 	dst->gvn_source = src->gvn_source;
71 }
72 
equal(value * l,value * r)73 bool expr_handler::equal(value *l, value *r) {
74 
75 	assert(l != r);
76 
77 	if (l->is_lds_access() || r->is_lds_access())
78 		return false;
79 	if (l->gvalue() == r->gvalue())
80 		return true;
81 
82 	if (l->def && r->def)
83 		return defs_equal(l, r);
84 
85 	if (l->is_rel() && r->is_rel())
86 		return ivars_equal(l, r);
87 
88 	return false;
89 }
90 
ivars_equal(value * l,value * r)91 bool expr_handler::ivars_equal(value* l, value* r) {
92 	if (l->rel->gvalue() == r->rel->gvalue()
93 			&& l->select == r->select) {
94 
95 		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
96 		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
97 
98 		// FIXME: replace this with more precise aliasing test
99 		return lv == rv;
100 	}
101 	return false;
102 }
103 
defs_equal(value * l,value * r)104 bool expr_handler::defs_equal(value* l, value* r) {
105 
106 	node *d1 = l->def;
107 	node *d2 = r->def;
108 
109 	if (d1->type != d2->type || d1->subtype != d2->subtype)
110 		return false;
111 
112 	if (d1->is_pred_set() || d2->is_pred_set())
113 		return false;
114 
115 	if (d1->type == NT_OP) {
116 		switch (d1->subtype) {
117 		case NST_ALU_INST:
118 			return ops_equal(
119 					static_cast<alu_node*>(d1),
120 					static_cast<alu_node*>(d2));
121 //		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
122 //			static_cast<fetch_node*>(d2);
123 //		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
124 //			static_cast<cf_node*>(d2);
125 		default:
126 			break;
127 		}
128 	}
129 	return false;
130 }
131 
try_fold(value * v)132 bool expr_handler::try_fold(value* v) {
133 	assert(!v->gvn_source);
134 
135 	if (v->def)
136 		try_fold(v->def);
137 
138 	if (v->gvn_source)
139 		return true;
140 
141 	return false;
142 }
143 
try_fold(node * n)144 bool expr_handler::try_fold(node* n) {
145 	return n->fold_dispatch(this);
146 }
147 
fold(node & n)148 bool expr_handler::fold(node& n) {
149 	if (n.subtype == NST_PHI) {
150 
151 		value *s = n.src[0];
152 
153 		// FIXME disabling phi folding for registers for now, otherwise we lose
154 		// control flow information in some cases
155 		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
156 		// probably control flow transformation is required to enable it
157 		if (s->is_sgpr())
158 			return false;
159 
160 		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
161 			value *v = *I;
162 			if (!s->v_equal(v))
163 				return false;
164 		}
165 
166 		assign_source(n.dst[0], s);
167 	} else {
168 		assert(n.subtype == NST_PSI);
169 		assert(n.src.size() >= 6);
170 
171 		value *s = n.src[2];
172 		assert(s->gvn_source);
173 
174 		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
175 			value *v = *(I+2);
176 			if (!s->v_equal(v))
177 				return false;
178 		}
179 		assign_source(n.dst[0], s);
180 	}
181 	return true;
182 }
183 
fold(container_node & n)184 bool expr_handler::fold(container_node& n) {
185 	return false;
186 }
187 
fold_setcc(alu_node & n)188 bool expr_handler::fold_setcc(alu_node &n) {
189 
190 	value* v0 = n.src[0]->gvalue();
191 	value* v1 = n.src[1]->gvalue();
192 
193 	assert(v0 && v1 && n.dst[0]);
194 
195 	unsigned flags = n.bc.op_ptr->flags;
196 	unsigned cc = flags & AF_CC_MASK;
197 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
198 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
199 
200 	bool cond_result;
201 	bool have_result = false;
202 
203 	bool isc0 = v0->is_const();
204 	bool isc1 = v1->is_const();
205 
206 	literal dv, cv0, cv1;
207 
208 	if (isc0) {
209 		cv0 = v0->get_const_value();
210 		apply_alu_src_mod(n.bc, 0, cv0);
211 	}
212 
213 	if (isc1) {
214 		cv1 = v1->get_const_value();
215 		apply_alu_src_mod(n.bc, 1, cv1);
216 	}
217 
218 	if (isc0 && isc1) {
219 		cond_result = evaluate_condition(flags, cv0, cv1);
220 		have_result = true;
221 	} else if (isc1) {
222 		if (cmp_type == AF_FLOAT_CMP) {
223 			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
224 				if (cv1.f < 0.0f && cc == AF_CC_NE) {
225 					cond_result = true;
226 					have_result = true;
227 				}
228 			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
229 				if (cv1.f > 0.0f && cc == AF_CC_E) {
230 					cond_result = false;
231 					have_result = true;
232 				}
233 			}
234 		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
235 			cond_result = true;
236 			have_result = true;
237 		}
238 	} else if (isc0) {
239 		if (cmp_type == AF_FLOAT_CMP) {
240 			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
241 				if (cv0.f < 0.0f && (cc == AF_CC_E)) {
242 					cond_result = false;
243 					have_result = true;
244 				}
245 			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
246 				if (cv0.f > 0.0f && cc == AF_CC_NE) {
247 					cond_result = true;
248 					have_result = true;
249 				}
250 			}
251 		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
252 			cond_result = false;
253 			have_result = true;
254 		}
255 	} else if (v0 == v1) {
256 		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
257 		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
258 			// NOTE can't handle float comparisons here because of NaNs
259 			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
260 			have_result = true;
261 		}
262 	}
263 
264 	if (have_result) {
265 		literal result;
266 
267 		if (cond_result)
268 			result = dst_type != AF_FLOAT_DST ?
269 					literal(0xFFFFFFFFu) : literal(1.0f);
270 		else
271 			result = literal(0);
272 
273 		convert_to_mov(n, sh.get_const_value(result));
274 		return fold_alu_op1(n);
275 	}
276 
277 	return false;
278 }
279 
fold(alu_node & n)280 bool expr_handler::fold(alu_node& n) {
281 
282 	switch (n.bc.op_ptr->src_count) {
283 	case 1: return fold_alu_op1(n);
284 	case 2: return fold_alu_op2(n);
285 	case 3: return fold_alu_op3(n);
286 	default:
287 		assert(0);
288 	}
289 	return false;
290 }
291 
fold(fetch_node & n)292 bool expr_handler::fold(fetch_node& n) {
293 
294 	unsigned chan = 0;
295 	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
296 		value* &v = *I;
297 		if (v) {
298 			if (n.bc.dst_sel[chan] == SEL_0)
299 				assign_source(*I, get_const(0.0f));
300 			else if (n.bc.dst_sel[chan] == SEL_1)
301 				assign_source(*I, get_const(1.0f));
302 		}
303 		++chan;
304 	}
305 	return false;
306 }
307 
fold(cf_node & n)308 bool expr_handler::fold(cf_node& n) {
309 	return false;
310 }
311 
apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)312 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
313                                      literal &v) {
314 	const bc_alu_src &s = bc.src[src];
315 
316 	if (s.abs)
317 		v = fabsf(v.f);
318 	if (s.neg)
319 		v = -v.f;
320 }
321 
apply_alu_dst_mod(const bc_alu & bc,literal & v)322 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
323 	const float omod_coeff[] = {2.0f, 4.0, 0.5f};
324 
325 	if (bc.omod)
326 		v = v.f * omod_coeff[bc.omod - 1];
327 	if (bc.clamp)
328 		v = float_clamp(v.f);
329 }
330 
args_equal(const vvec & l,const vvec & r)331 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
332 
333 	assert(l.size() == r.size());
334 
335 	int s = l.size();
336 
337 	for (int k = 0; k < s; ++k) {
338 		if (!l[k]->v_equal(r[k]))
339 			return false;
340 	}
341 
342 	return true;
343 }
344 
ops_equal(const alu_node * l,const alu_node * r)345 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
346 	const bc_alu &b0 = l->bc;
347 	const bc_alu &b1 = r->bc;
348 
349 	if (b0.op != b1.op)
350 		return false;
351 
352 	unsigned src_count = b0.op_ptr->src_count;
353 
354 	if (b0.index_mode != b1.index_mode)
355 		return false;
356 
357 	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
358 			return false;
359 
360 	for (unsigned s = 0; s < src_count; ++s) {
361 		const bc_alu_src &s0 = b0.src[s];
362 		const bc_alu_src &s1 = b1.src[s];
363 
364 		if (s0.abs != s1.abs || s0.neg != s1.neg)
365 			return false;
366 	}
367 	return args_equal(l->src, r->src);
368 }
369 
fold_alu_op1(alu_node & n)370 bool expr_handler::fold_alu_op1(alu_node& n) {
371 
372 	assert(!n.src.empty());
373 	if (n.src.empty())
374 		return false;
375 
376 	/* don't fold LDS instructions */
377 	if (n.bc.op_ptr->flags & AF_LDS)
378 		return false;
379 
380 	value* v0 = n.src[0]->gvalue();
381 
382 	if (v0->is_lds_oq() || v0->is_lds_access())
383 		return false;
384 	assert(v0 && n.dst[0]);
385 
386 	if (!v0->is_const()) {
387 		// handle (MOV -(MOV -x)) => (MOV x)
388 		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
389 				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
390 			alu_node *sd = static_cast<alu_node*>(v0->def);
391 			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
392 					sd->bc.src[0].neg) {
393 				n.src[0] = sd->src[0];
394 				n.bc.src[0].neg = 0;
395 				v0 = n.src[0]->gvalue();
396 			}
397 		}
398 
399 		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
400 				n.bc.op == ALU_OP1_MOVA_GPR_INT)
401 				&& n.bc.clamp == 0 && n.bc.omod == 0
402 				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
403 				n.src.size() == 1 /* RIM/SIM can be appended as additional values */
404 				&& n.dst[0]->no_reladdr_conflict_with(v0)) {
405 			assign_source(n.dst[0], v0);
406 			return true;
407 		}
408 		return false;
409 	}
410 
411 	literal dv, cv = v0->get_const_value();
412 	apply_alu_src_mod(n.bc, 0, cv);
413 
414 	switch (n.bc.op) {
415 	case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
416 	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
417 	case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
418 	case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
419 	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
420 	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
421 	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
422 	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
423 	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
424 	case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
425 	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
426 	case ALU_OP1_LOG_CLAMPED:
427 	case ALU_OP1_LOG_IEEE:
428 		if (cv.f != 0.0f)
429 			dv = log2f(cv.f);
430 		else
431 			// don't fold to NAN, let the GPU handle it for now
432 			// (prevents degenerate LIT tests from failing)
433 			return false;
434 		break;
435 	case ALU_OP1_MOV: dv = cv; break;
436 	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
437 //	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
438 //	case ALU_OP1_MOVA_GPR_INT:
439 	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
440 	case ALU_OP1_PRED_SET_INV:
441 		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
442 	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
443 	case ALU_OP1_RECIPSQRT_CLAMPED:
444 	case ALU_OP1_RECIPSQRT_FF:
445 	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
446 	case ALU_OP1_RECIP_CLAMPED:
447 	case ALU_OP1_RECIP_FF:
448 	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
449 //	case ALU_OP1_RECIP_INT:
450 	case ALU_OP1_RECIP_UINT: {
451 		if (!cv.u)
452 			return false;
453 		dv.u = (1ull << 32) / cv.u;
454 		break;
455 	}
456 	//	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
457 	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
458 	case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
459 	case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
460 
461 	default:
462 		return false;
463 	}
464 
465 	apply_alu_dst_mod(n.bc, dv);
466 	assign_source(n.dst[0], get_const(dv));
467 	return true;
468 }
469 
fold_mul_add(alu_node * n)470 bool expr_handler::fold_mul_add(alu_node *n) {
471 
472 	bool ieee;
473 	value* v0 = n->src[0]->gvalue();
474 
475 	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
476 			static_cast<alu_node*>(v0->def) : NULL;
477 
478 	if (d0) {
479 		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
480 			ieee = true;
481 		else if (d0->is_alu_op(ALU_OP2_MUL))
482 			ieee = false;
483 		else
484 			return false;
485 
486 		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
487 				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
488 				!d0->bc.clamp && !n->bc.omod &&
489 				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
490 						!n->src[1]->is_kcache())) {
491 
492 			bool mul_neg = n->bc.src[0].neg;
493 
494 			n->src.resize(3);
495 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
496 			n->src[2] = n->src[1];
497 			n->bc.src[2] = n->bc.src[1];
498 			n->src[0] = d0->src[0];
499 			n->bc.src[0] = d0->bc.src[0];
500 			n->src[1] = d0->src[1];
501 			n->bc.src[1] = d0->bc.src[1];
502 
503 			n->bc.src[0].neg ^= mul_neg;
504 
505 			fold_alu_op3(*n);
506 			return true;
507 		}
508 	}
509 
510 	value* v1 = n->src[1]->gvalue();
511 
512 	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
513 			static_cast<alu_node*>(v1->def) : NULL;
514 
515 	if (d1) {
516 		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
517 			ieee = true;
518 		else if (d1->is_alu_op(ALU_OP2_MUL))
519 			ieee = false;
520 		else
521 			return false;
522 
523 		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
524 				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
525 				!d1->bc.clamp && !n->bc.omod &&
526 				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
527 						!n->src[0]->is_kcache())) {
528 
529 			bool mul_neg = n->bc.src[1].neg;
530 
531 			n->src.resize(3);
532 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
533 			n->src[2] = n->src[0];
534 			n->bc.src[2] = n->bc.src[0];
535 			n->src[1] = d1->src[1];
536 			n->bc.src[1] = d1->bc.src[1];
537 			n->src[0] = d1->src[0];
538 			n->bc.src[0] = d1->bc.src[0];
539 
540 			n->bc.src[1].neg ^= mul_neg;
541 
542 			fold_alu_op3(*n);
543 			return true;
544 		}
545 	}
546 
547 	return false;
548 }
549 
eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)550 bool expr_handler::eval_const_op(unsigned op, literal &r,
551                                  literal cv0, literal cv1) {
552 
553 	switch (op) {
554 	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
555 	case ALU_OP2_ADDC_UINT:
556 		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
557 	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
558 	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
559 	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
560 	case ALU_OP2_BFM_INT:
561 		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
562 	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
563 	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
564 	case ALU_OP2_MAX:
565 	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
566 	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
567 	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
568 	case ALU_OP2_MIN:
569 	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
570 	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
571 	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
572 	case ALU_OP2_MUL:
573 	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
574 	case ALU_OP2_MULHI_INT:
575 		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
576 	case ALU_OP2_MULHI_UINT:
577 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
578 	case ALU_OP2_MULLO_INT:
579 		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
580 	case ALU_OP2_MULLO_UINT:
581 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
582 	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
583 	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
584 	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
585 
586 	default:
587 		return false;
588 	}
589 
590 	return true;
591 }
592 
593 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
fold_assoc(alu_node * n)594 bool expr_handler::fold_assoc(alu_node *n) {
595 
596 	alu_node *a = n;
597 	literal cr;
598 
599 	int last_arg = -3;
600 
601 	unsigned op = n->bc.op;
602 	bool allow_neg = false, cur_neg = false;
603 	bool distribute_neg = false;
604 
605 	switch(op) {
606 	case ALU_OP2_ADD:
607 		distribute_neg = true;
608 		allow_neg = true;
609 		break;
610 	case ALU_OP2_MUL:
611 	case ALU_OP2_MUL_IEEE:
612 		allow_neg = true;
613 		break;
614 	case ALU_OP3_MULADD:
615 		allow_neg = true;
616 		op = ALU_OP2_MUL;
617 		break;
618 	case ALU_OP3_MULADD_IEEE:
619 		allow_neg = true;
620 		op = ALU_OP2_MUL_IEEE;
621 		break;
622 	default:
623 		if (n->bc.op_ptr->src_count != 2)
624 			return false;
625 	}
626 
627 	// check if we can evaluate the op
628 	if (!eval_const_op(op, cr, literal(0), literal(0)))
629 		return false;
630 
631 	while (true) {
632 
633 		value *v0 = a->src[0]->gvalue();
634 		value *v1 = a->src[1]->gvalue();
635 
636 		last_arg = -2;
637 
638 		if (v1->is_const()) {
639 			literal arg = v1->get_const_value();
640 			apply_alu_src_mod(a->bc, 1, arg);
641 			if (cur_neg && distribute_neg)
642 				arg.f = -arg.f;
643 
644 			if (a == n)
645 				cr = arg;
646 			else
647 				eval_const_op(op, cr, cr, arg);
648 
649 			if (v0->def) {
650 				alu_node *d0 = static_cast<alu_node*>(v0->def);
651 				if ((d0->is_alu_op(op) ||
652 						(op == ALU_OP2_MUL_IEEE &&
653 								d0->is_alu_op(ALU_OP2_MUL))) &&
654 						!d0->bc.omod && !d0->bc.clamp &&
655 						!a->bc.src[0].abs &&
656 						(!a->bc.src[0].neg || allow_neg)) {
657 					cur_neg ^= a->bc.src[0].neg;
658 					a = d0;
659 					continue;
660 				}
661 			}
662 			last_arg = 0;
663 
664 		}
665 
666 		if (v0->is_const()) {
667 			literal arg = v0->get_const_value();
668 			apply_alu_src_mod(a->bc, 0, arg);
669 			if (cur_neg && distribute_neg)
670 				arg.f = -arg.f;
671 
672 			if (last_arg == 0) {
673 				eval_const_op(op, cr, cr, arg);
674 				last_arg = -1;
675 				break;
676 			}
677 
678 			if (a == n)
679 				cr = arg;
680 			else
681 				eval_const_op(op, cr, cr, arg);
682 
683 			if (v1->def) {
684 				alu_node *d1 = static_cast<alu_node*>(v1->def);
685 				if ((d1->is_alu_op(op) ||
686 						(op == ALU_OP2_MUL_IEEE &&
687 								d1->is_alu_op(ALU_OP2_MUL))) &&
688 						!d1->bc.omod && !d1->bc.clamp &&
689 						!a->bc.src[1].abs &&
690 						(!a->bc.src[1].neg || allow_neg)) {
691 					cur_neg ^= a->bc.src[1].neg;
692 					a = d1;
693 					continue;
694 				}
695 			}
696 
697 			last_arg = 1;
698 		}
699 
700 		break;
701 	};
702 
703 	if (last_arg == -1) {
704 		// result is const
705 		apply_alu_dst_mod(n->bc, cr);
706 
707 		if (n->bc.op == op) {
708 			convert_to_mov(*n, sh.get_const_value(cr));
709 			fold_alu_op1(*n);
710 			return true;
711 		} else { // MULADD => ADD
712 			n->src[0] = n->src[2];
713 			n->bc.src[0] = n->bc.src[2];
714 			n->src[1] = sh.get_const_value(cr);
715 			n->bc.src[1].clear();
716 
717 			n->src.resize(2);
718 			n->bc.set_op(ALU_OP2_ADD);
719 		}
720 	} else if (last_arg >= 0) {
721 		n->src[0] = a->src[last_arg];
722 		n->bc.src[0] = a->bc.src[last_arg];
723 		n->bc.src[0].neg ^= cur_neg;
724 		n->src[1] = sh.get_const_value(cr);
725 		n->bc.src[1].clear();
726 	}
727 
728 	return false;
729 }
730 
fold_alu_op2(alu_node & n)731 bool expr_handler::fold_alu_op2(alu_node& n) {
732 
733 	if (n.src.size() < 2)
734 		return false;
735 
736 	unsigned flags = n.bc.op_ptr->flags;
737 
738 	if (flags & AF_SET) {
739 		return fold_setcc(n);
740 	}
741 
742 	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
743 		if (fold_assoc(&n))
744 			return true;
745 	}
746 
747 	value* v0 = n.src[0]->gvalue();
748 	value* v1 = n.src[1]->gvalue();
749 
750 	assert(v0 && v1);
751 
752 	// handle some operations with equal args, e.g. x + x => x * 2
753 	if (v0 == v1) {
754 		if (n.bc.src[0].neg == n.bc.src[1].neg &&
755 				n.bc.src[0].abs == n.bc.src[1].abs) {
756 			switch (n.bc.op) {
757 			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
758 			case ALU_OP2_MIN_DX10:
759 			case ALU_OP2_MAX:
760 			case ALU_OP2_MAX_DX10:
761 				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
762 				return fold_alu_op1(n);
763 			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
764 				if (!sh.safe_math) {
765 					n.src[1] = sh.get_const_value(2.0f);
766 					n.bc.src[1].clear();
767 					n.bc.set_op(ALU_OP2_MUL);
768 					return fold_alu_op2(n);
769 				}
770 				break;
771 			}
772 		}
773 		if (n.bc.src[0].neg != n.bc.src[1].neg &&
774 				n.bc.src[0].abs == n.bc.src[1].abs) {
775 			switch (n.bc.op) {
776 			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
777 				if (!sh.safe_math) {
778 					convert_to_mov(n, sh.get_const_value(literal(0)));
779 					return fold_alu_op1(n);
780 				}
781 				break;
782 			}
783 		}
784 	}
785 
786 	if (n.bc.op == ALU_OP2_ADD) {
787 		if (fold_mul_add(&n))
788 			return true;
789 	}
790 
791 	bool isc0 = v0->is_const();
792 	bool isc1 = v1->is_const();
793 
794 	if (!isc0 && !isc1)
795 		return false;
796 
797 	literal dv, cv0, cv1;
798 
799 	if (isc0) {
800 		cv0 = v0->get_const_value();
801 		apply_alu_src_mod(n.bc, 0, cv0);
802 	}
803 
804 	if (isc1) {
805 		cv1 = v1->get_const_value();
806 		apply_alu_src_mod(n.bc, 1, cv1);
807 	}
808 
809 	if (isc0 && isc1) {
810 
811 		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
812 			return false;
813 
814 	} else { // one source is const
815 
816 		if (isc0 && cv0 == literal(0)) {
817 			switch (n.bc.op) {
818 			case ALU_OP2_ADD:
819 			case ALU_OP2_ADD_INT:
820 			case ALU_OP2_MAX_UINT:
821 			case ALU_OP2_OR_INT:
822 			case ALU_OP2_XOR_INT:
823 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
824 				return fold_alu_op1(n);
825 			case ALU_OP2_AND_INT:
826 			case ALU_OP2_ASHR_INT:
827 			case ALU_OP2_LSHL_INT:
828 			case ALU_OP2_LSHR_INT:
829 			case ALU_OP2_MIN_UINT:
830 			case ALU_OP2_MUL:
831 			case ALU_OP2_MULHI_UINT:
832 			case ALU_OP2_MULLO_UINT:
833 				convert_to_mov(n, sh.get_const_value(literal(0)));
834 				return fold_alu_op1(n);
835 			}
836 		} else if (isc1 && cv1 == literal(0)) {
837 			switch (n.bc.op) {
838 			case ALU_OP2_ADD:
839 			case ALU_OP2_ADD_INT:
840 			case ALU_OP2_ASHR_INT:
841 			case ALU_OP2_LSHL_INT:
842 			case ALU_OP2_LSHR_INT:
843 			case ALU_OP2_MAX_UINT:
844 			case ALU_OP2_OR_INT:
845 			case ALU_OP2_SUB_INT:
846 			case ALU_OP2_XOR_INT:
847 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
848 				return fold_alu_op1(n);
849 			case ALU_OP2_AND_INT:
850 			case ALU_OP2_MIN_UINT:
851 			case ALU_OP2_MUL:
852 			case ALU_OP2_MULHI_UINT:
853 			case ALU_OP2_MULLO_UINT:
854 				convert_to_mov(n, sh.get_const_value(literal(0)));
855 				return fold_alu_op1(n);
856 			}
857 		} else if (isc0 && cv0 == literal(1.0f)) {
858 			switch (n.bc.op) {
859 			case ALU_OP2_MUL:
860 			case ALU_OP2_MUL_IEEE:
861 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
862 				return fold_alu_op1(n);
863 			}
864 		} else if (isc1 && cv1 == literal(1.0f)) {
865 			switch (n.bc.op) {
866 			case ALU_OP2_MUL:
867 			case ALU_OP2_MUL_IEEE:
868 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
869 				return fold_alu_op1(n);
870 			}
871 		}
872 
873 		return false;
874 	}
875 
876 	apply_alu_dst_mod(n.bc, dv);
877 	assign_source(n.dst[0], get_const(dv));
878 	return true;
879 }
880 
evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)881 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
882                                       literal s1, literal s2) {
883 
884 	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
885 	unsigned cc = alu_cnd_flags & AF_CC_MASK;
886 
887 	switch (cmp_type) {
888 	case AF_FLOAT_CMP: {
889 		switch (cc) {
890 		case AF_CC_E : return s1.f == s2.f;
891 		case AF_CC_GT: return s1.f >  s2.f;
892 		case AF_CC_GE: return s1.f >= s2.f;
893 		case AF_CC_NE: return s1.f != s2.f;
894 		case AF_CC_LT: return s1.f <  s2.f;
895 		case AF_CC_LE: return s1.f <= s2.f;
896 		default:
897 			assert(!"invalid condition code");
898 			return false;
899 		}
900 	}
901 	case AF_INT_CMP: {
902 		switch (cc) {
903 		case AF_CC_E : return s1.i == s2.i;
904 		case AF_CC_GT: return s1.i >  s2.i;
905 		case AF_CC_GE: return s1.i >= s2.i;
906 		case AF_CC_NE: return s1.i != s2.i;
907 		case AF_CC_LT: return s1.i <  s2.i;
908 		case AF_CC_LE: return s1.i <= s2.i;
909 		default:
910 			assert(!"invalid condition code");
911 			return false;
912 		}
913 	}
914 	case AF_UINT_CMP: {
915 		switch (cc) {
916 		case AF_CC_E : return s1.u == s2.u;
917 		case AF_CC_GT: return s1.u >  s2.u;
918 		case AF_CC_GE: return s1.u >= s2.u;
919 		case AF_CC_NE: return s1.u != s2.u;
920 		case AF_CC_LT: return s1.u <  s2.u;
921 		case AF_CC_LE: return s1.u <= s2.u;
922 		default:
923 			assert(!"invalid condition code");
924 			return false;
925 		}
926 	}
927 	default:
928 		assert(!"invalid cmp_type");
929 		return false;
930 	}
931 }
932 
fold_alu_op3(alu_node & n)933 bool expr_handler::fold_alu_op3(alu_node& n) {
934 
935 	if (n.src.size() < 3)
936 		return false;
937 
938 	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
939 		if (fold_assoc(&n))
940 			return true;
941 		if (n.src.size() < 3)
942 			return fold_alu_op2(n);
943 	}
944 
945 	value* v0 = n.src[0]->gvalue();
946 	value* v1 = n.src[1]->gvalue();
947 	value* v2 = n.src[2]->gvalue();
948 
949 	/* LDS instructions look like op3 with no dst - don't fold. */
950 	if (!n.dst[0])
951 		return false;
952 	assert(v0 && v1 && v2 && n.dst[0]);
953 
954 	bool isc0 = v0->is_const();
955 	bool isc1 = v1->is_const();
956 	bool isc2 = v2->is_const();
957 
958 	literal dv, cv0, cv1, cv2;
959 
960 	if (isc0) {
961 		cv0 = v0->get_const_value();
962 		apply_alu_src_mod(n.bc, 0, cv0);
963 	}
964 
965 	if (isc1) {
966 		cv1 = v1->get_const_value();
967 		apply_alu_src_mod(n.bc, 1, cv1);
968 	}
969 
970 	if (isc2) {
971 		cv2 = v2->get_const_value();
972 		apply_alu_src_mod(n.bc, 2, cv2);
973 	}
974 
975 	unsigned flags = n.bc.op_ptr->flags;
976 
977 	if (flags & AF_CMOV) {
978 		int src = 0;
979 
980 		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
981 			// result doesn't depend on condition, convert to MOV
982 			src = 1;
983 		} else if (isc0) {
984 			// src0 is const, condition can be evaluated, convert to MOV
985 			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
986 					AF_CMP_TYPE_MASK), cv0, literal(0));
987 			src = cond ? 1 : 2;
988 		}
989 
990 		if (src) {
991 			// if src is selected, convert to MOV
992 			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
993 			return fold_alu_op1(n);
994 		}
995 	}
996 
997 	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
998 	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
999 			n.bc.op == ALU_OP3_MULADD_IEEE)) {
1000 
1001 		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1002 				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1003 
1004 		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
1005 
1006 			alu_node *md = static_cast<alu_node*>(v2->def);
1007 			value *mv0 = md->src[0]->gvalue();
1008 			value *mv1 = md->src[1]->gvalue();
1009 
1010 			int es0 = -1, es1 = -1;
1011 
1012 			if (v0 == mv0) {
1013 				es0 = 0;
1014 				es1 = 0;
1015 			} else if (v0 == mv1) {
1016 				es0 = 0;
1017 				es1 = 1;
1018 			} else if (v1 == mv0) {
1019 				es0 = 1;
1020 				es1 = 0;
1021 			} else if (v1 == mv1) {
1022 				es0 = 1;
1023 				es1 = 1;
1024 			}
1025 
1026 			value *va0 = es0 == 0 ? v1 : v0;
1027 			value *va1 = es1 == 0 ? mv1 : mv0;
1028 
1029 			/* Don't fold if no equal multipliers were found.
1030 			 * Also don#t fold if the operands of the to be created ADD are both
1031 			 * relatively accessed with different AR values because that would
1032 			 * create impossible code.
1033 			 */
1034 			if (es0 != -1 &&
1035 			    (!va0->is_rel() || !va1->is_rel() ||
1036 			     (va0->rel == va1->rel))) {
1037 
1038 				alu_node *add = sh.create_alu();
1039 				add->bc.set_op(ALU_OP2_ADD);
1040 
1041 				add->dst.resize(1);
1042 				add->src.resize(2);
1043 
1044 				value *t = sh.create_temp_value();
1045 				t->def = add;
1046 				add->dst[0] = t;
1047 				add->src[0] = va0;
1048 				add->src[1] = va1;
1049 				add->bc.src[0] = n.bc.src[!es0];
1050 				add->bc.src[1] = md->bc.src[!es1];
1051 
1052 				add->bc.src[1].neg ^= n.bc.src[2].neg ^
1053 						(n.bc.src[es0].neg != md->bc.src[es1].neg);
1054 
1055 				n.insert_before(add);
1056 				vt.add_value(t);
1057 
1058 				t = t->gvalue();
1059 
1060 				if (es0 == 1) {
1061 					n.src[0] = n.src[1];
1062 					n.bc.src[0] = n.bc.src[1];
1063 				}
1064 
1065 				n.src[1] = t;
1066 				n.bc.src[1].clear();
1067 
1068 				n.src.resize(2);
1069 
1070 				n.bc.set_op(op);
1071 				return fold_alu_op2(n);
1072 			}
1073 		}
1074 	}
1075 
1076 	if (!isc0 && !isc1 && !isc2)
1077 		return false;
1078 
1079 	if (isc0 && isc1 && isc2) {
1080 		switch (n.bc.op) {
1081 		case ALU_OP3_MULADD_IEEE:
1082 		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1083 
1084 		// TODO
1085 
1086 		default:
1087 			return false;
1088 		}
1089 	} else {
1090 		if (isc0 && isc1) {
1091 			switch (n.bc.op) {
1092 			case ALU_OP3_MULADD:
1093 			case ALU_OP3_MULADD_IEEE:
1094 				dv = cv0.f * cv1.f;
1095 				n.bc.set_op(ALU_OP2_ADD);
1096 				n.src[0] = sh.get_const_value(dv);
1097 				n.bc.src[0].clear();
1098 				n.src[1] = n.src[2];
1099 				n.bc.src[1] = n.bc.src[2];
1100 				n.src.resize(2);
1101 				return fold_alu_op2(n);
1102 			}
1103 		}
1104 
1105 		if (n.bc.op == ALU_OP3_MULADD) {
1106 			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1107 				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
1108 				return fold_alu_op1(n);
1109 			}
1110 		}
1111 
1112 		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1113 			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1114 					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1115 
1116 			if (isc1 && v0 == v2) {
1117 				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1118 				n.src[1] = sh.get_const_value(cv1);
1119 				n.bc.src[1].neg = 0;
1120 				n.bc.src[1].abs = 0;
1121 				n.bc.set_op(op);
1122 				n.src.resize(2);
1123 				return fold_alu_op2(n);
1124 			} else if (isc0 && v1 == v2) {
1125 				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1126 				n.src[0] = sh.get_const_value(cv0);
1127 				n.bc.src[0].neg = 0;
1128 				n.bc.src[0].abs = 0;
1129 				n.bc.set_op(op);
1130 				n.src.resize(2);
1131 				return fold_alu_op2(n);
1132 			}
1133 		}
1134 
1135 		return false;
1136 	}
1137 
1138 	apply_alu_dst_mod(n.bc, dv);
1139 	assign_source(n.dst[0], get_const(dv));
1140 	return true;
1141 }
1142 
invert_setcc_condition(unsigned cc,bool & swap_args)1143 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1144 	unsigned ncc = 0;
1145 
1146 	switch (cc) {
1147 	case AF_CC_E: ncc = AF_CC_NE; break;
1148 	case AF_CC_NE: ncc = AF_CC_E; break;
1149 	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1150 	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1151 	default:
1152 		assert(!"unexpected condition code");
1153 		break;
1154 	}
1155 	return ncc;
1156 }
1157 
get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1158 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1159 
1160 	if (int_dst && cmp_type == AF_FLOAT_CMP) {
1161 		switch (cc) {
1162 		case AF_CC_E: return ALU_OP2_SETE_DX10;
1163 		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1164 		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1165 		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1166 		}
1167 	} else {
1168 
1169 		switch(cmp_type) {
1170 		case AF_FLOAT_CMP: {
1171 			switch (cc) {
1172 			case AF_CC_E: return ALU_OP2_SETE;
1173 			case AF_CC_NE: return ALU_OP2_SETNE;
1174 			case AF_CC_GT: return ALU_OP2_SETGT;
1175 			case AF_CC_GE: return ALU_OP2_SETGE;
1176 			}
1177 			break;
1178 		}
1179 		case AF_INT_CMP: {
1180 			switch (cc) {
1181 			case AF_CC_E: return ALU_OP2_SETE_INT;
1182 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1183 			case AF_CC_GT: return ALU_OP2_SETGT_INT;
1184 			case AF_CC_GE: return ALU_OP2_SETGE_INT;
1185 			}
1186 			break;
1187 		}
1188 		case AF_UINT_CMP: {
1189 			switch (cc) {
1190 			case AF_CC_E: return ALU_OP2_SETE_INT;
1191 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1192 			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1193 			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1194 			}
1195 			break;
1196 		}
1197 		}
1198 	}
1199 
1200 	assert(!"unexpected cc&cmp_type combination");
1201 	return ~0u;
1202 }
1203 
get_predsetcc_op(unsigned cc,unsigned cmp_type)1204 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1205 
1206 	switch(cmp_type) {
1207 	case AF_FLOAT_CMP: {
1208 		switch (cc) {
1209 		case AF_CC_E: return ALU_OP2_PRED_SETE;
1210 		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1211 		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1212 		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1213 		}
1214 		break;
1215 	}
1216 	case AF_INT_CMP: {
1217 		switch (cc) {
1218 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1219 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1220 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1221 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1222 		}
1223 		break;
1224 	}
1225 	case AF_UINT_CMP: {
1226 		switch (cc) {
1227 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1228 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1229 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1230 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1231 		}
1232 		break;
1233 	}
1234 	}
1235 
1236 	assert(!"unexpected cc&cmp_type combination");
1237 	return ~0u;
1238 }
1239 
get_killcc_op(unsigned cc,unsigned cmp_type)1240 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1241 
1242 	switch(cmp_type) {
1243 	case AF_FLOAT_CMP: {
1244 		switch (cc) {
1245 		case AF_CC_E: return ALU_OP2_KILLE;
1246 		case AF_CC_NE: return ALU_OP2_KILLNE;
1247 		case AF_CC_GT: return ALU_OP2_KILLGT;
1248 		case AF_CC_GE: return ALU_OP2_KILLGE;
1249 		}
1250 		break;
1251 	}
1252 	case AF_INT_CMP: {
1253 		switch (cc) {
1254 		case AF_CC_E: return ALU_OP2_KILLE_INT;
1255 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1256 		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1257 		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1258 		}
1259 		break;
1260 	}
1261 	case AF_UINT_CMP: {
1262 		switch (cc) {
1263 		case AF_CC_E: return ALU_OP2_KILLE_INT;
1264 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1265 		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1266 		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1267 		}
1268 		break;
1269 	}
1270 	}
1271 
1272 	assert(!"unexpected cc&cmp_type combination");
1273 	return ~0u;
1274 }
1275 
get_cndcc_op(unsigned cc,unsigned cmp_type)1276 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1277 
1278 	switch(cmp_type) {
1279 	case AF_FLOAT_CMP: {
1280 		switch (cc) {
1281 		case AF_CC_E: return ALU_OP3_CNDE;
1282 		case AF_CC_GT: return ALU_OP3_CNDGT;
1283 		case AF_CC_GE: return ALU_OP3_CNDGE;
1284 		}
1285 		break;
1286 	}
1287 	case AF_INT_CMP: {
1288 		switch (cc) {
1289 		case AF_CC_E: return ALU_OP3_CNDE_INT;
1290 		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1291 		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1292 		}
1293 		break;
1294 	}
1295 	}
1296 
1297 	assert(!"unexpected cc&cmp_type combination");
1298 	return ~0u;
1299 }
1300 
1301 
convert_predset_to_set(shader & sh,alu_node * a)1302 void convert_predset_to_set(shader& sh, alu_node* a) {
1303 
1304 	unsigned flags = a->bc.op_ptr->flags;
1305 	unsigned cc = flags & AF_CC_MASK;
1306 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1307 
1308 	bool swap_args = false;
1309 
1310 	cc = invert_setcc_condition(cc, swap_args);
1311 
1312 	unsigned newop = get_setcc_op(cc, cmp_type, true);
1313 
1314 	a->dst.resize(1);
1315 	a->bc.set_op(newop);
1316 
1317 	if (swap_args) {
1318 		std::swap(a->src[0], a->src[1]);
1319 		std::swap(a->bc.src[0], a->bc.src[1]);
1320 	}
1321 
1322 	a->bc.update_exec_mask = 0;
1323 	a->bc.update_pred = 0;
1324 }
1325 
1326 } // namespace r600_sb
1327