• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * on the rights to use, copy, modify, merge, publish, distribute, sub
8   * license, and/or sell copies of the Software, and to permit persons to whom
9   * the Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21   * USE OR OTHER DEALINGS IN THE SOFTWARE.
22   *
23   * Authors:
24   *      Vadim Girlin
25   */
26  
27  #include <cmath>
28  
29  #include "sb_shader.h"
30  
31  namespace r600_sb {
32  
get_select_value_for_em(shader & sh,value * em)33  value* get_select_value_for_em(shader& sh, value* em) {
34  	if (!em->def)
35  		return NULL;
36  
37  	node *predset = em->def;
38  	if (!predset->is_pred_set())
39  		return NULL;
40  
41  	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42  	convert_predset_to_set(sh, s);
43  
44  	predset->insert_after(s);
45  
46  	value* &d0 = s->dst[0];
47  	d0 = sh.create_temp_value();
48  	d0->def = s;
49  	return d0;
50  }
51  
convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52  void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53  	n.src.resize(1);
54  	n.src[0] = src;
55  	n.bc.src[0].abs = abs;
56  	n.bc.src[0].neg = neg;
57  	n.bc.set_op(ALU_OP1_MOV);
58  }
59  
expr_handler(shader & sh)60  expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61  
get_const(const literal & l)62  value * expr_handler::get_const(const literal &l) {
63  	value *v = sh.get_const_value(l);
64  	if (!v->gvn_source)
65  		vt.add_value(v);
66  	return v;
67  }
68  
assign_source(value * dst,value * src)69  void expr_handler::assign_source(value *dst, value *src) {
70  	dst->gvn_source = src->gvn_source;
71  }
72  
equal(value * l,value * r)73  bool expr_handler::equal(value *l, value *r) {
74  
75  	assert(l != r);
76  
77  	if (l->gvalue() == r->gvalue())
78  		return true;
79  
80  	if (l->def && r->def)
81  		return defs_equal(l, r);
82  
83  	if (l->is_rel() && r->is_rel())
84  		return ivars_equal(l, r);
85  
86  	return false;
87  }
88  
ivars_equal(value * l,value * r)89  bool expr_handler::ivars_equal(value* l, value* r) {
90  	if (l->rel->gvalue() == r->rel->gvalue()
91  			&& l->select == r->select) {
92  
93  		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
94  		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
95  
96  		// FIXME: replace this with more precise aliasing test
97  		return lv == rv;
98  	}
99  	return false;
100  }
101  
defs_equal(value * l,value * r)102  bool expr_handler::defs_equal(value* l, value* r) {
103  
104  	node *d1 = l->def;
105  	node *d2 = r->def;
106  
107  	if (d1->type != d2->type || d1->subtype != d2->subtype)
108  		return false;
109  
110  	if (d1->is_pred_set() || d2->is_pred_set())
111  		return false;
112  
113  	if (d1->type == NT_OP) {
114  		switch (d1->subtype) {
115  		case NST_ALU_INST:
116  			return ops_equal(
117  					static_cast<alu_node*>(d1),
118  					static_cast<alu_node*>(d2));
119  //		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120  //			static_cast<fetch_node*>(d2);
121  //		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122  //			static_cast<cf_node*>(d2);
123  		default:
124  			break;
125  		}
126  	}
127  	return false;
128  }
129  
try_fold(value * v)130  bool expr_handler::try_fold(value* v) {
131  	assert(!v->gvn_source);
132  
133  	if (v->def)
134  		try_fold(v->def);
135  
136  	if (v->gvn_source)
137  		return true;
138  
139  	return false;
140  }
141  
try_fold(node * n)142  bool expr_handler::try_fold(node* n) {
143  	return n->fold_dispatch(this);
144  }
145  
fold(node & n)146  bool expr_handler::fold(node& n) {
147  	if (n.subtype == NST_PHI) {
148  
149  		value *s = n.src[0];
150  
151  		// FIXME disabling phi folding for registers for now, otherwise we lose
152  		// control flow information in some cases
153  		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154  		// probably control flow transformation is required to enable it
155  		if (s->is_sgpr())
156  			return false;
157  
158  		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
159  			value *v = *I;
160  			if (!s->v_equal(v))
161  				return false;
162  		}
163  
164  		assign_source(n.dst[0], s);
165  	} else {
166  		assert(n.subtype == NST_PSI);
167  		assert(n.src.size() >= 6);
168  
169  		value *s = n.src[2];
170  		assert(s->gvn_source);
171  
172  		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
173  			value *v = *(I+2);
174  			if (!s->v_equal(v))
175  				return false;
176  		}
177  		assign_source(n.dst[0], s);
178  	}
179  	return true;
180  }
181  
fold(container_node & n)182  bool expr_handler::fold(container_node& n) {
183  	return false;
184  }
185  
fold_setcc(alu_node & n)186  bool expr_handler::fold_setcc(alu_node &n) {
187  
188  	value* v0 = n.src[0]->gvalue();
189  	value* v1 = n.src[1]->gvalue();
190  
191  	assert(v0 && v1 && n.dst[0]);
192  
193  	unsigned flags = n.bc.op_ptr->flags;
194  	unsigned cc = flags & AF_CC_MASK;
195  	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196  	unsigned dst_type = flags & AF_DST_TYPE_MASK;
197  
198  	bool cond_result;
199  	bool have_result = false;
200  
201  	bool isc0 = v0->is_const();
202  	bool isc1 = v1->is_const();
203  
204  	literal dv, cv0, cv1;
205  
206  	if (isc0) {
207  		cv0 = v0->get_const_value();
208  		apply_alu_src_mod(n.bc, 0, cv0);
209  	}
210  
211  	if (isc1) {
212  		cv1 = v1->get_const_value();
213  		apply_alu_src_mod(n.bc, 1, cv1);
214  	}
215  
216  	if (isc0 && isc1) {
217  		cond_result = evaluate_condition(flags, cv0, cv1);
218  		have_result = true;
219  	} else if (isc1) {
220  		if (cmp_type == AF_FLOAT_CMP) {
221  			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
222  				if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
223  					cond_result = true;
224  					have_result = true;
225  				} else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
226  					cond_result = true;
227  					have_result = true;
228  				}
229  			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
230  				if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
231  					cond_result = false;
232  					have_result = true;
233  				} else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
234  					cond_result = false;
235  					have_result = true;
236  				}
237  			}
238  		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
239  			cond_result = true;
240  			have_result = true;
241  		}
242  	} else if (isc0) {
243  		if (cmp_type == AF_FLOAT_CMP) {
244  			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
245  				if (cv0.f <= 0.0f && cc == AF_CC_GT) {
246  					cond_result = false;
247  					have_result = true;
248  				} else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
249  					cond_result = false;
250  					have_result = true;
251  				}
252  			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
253  				if (cv0.f >= 0.0f && cc == AF_CC_GE) {
254  					cond_result = true;
255  					have_result = true;
256  				} else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
257  					cond_result = true;
258  					have_result = true;
259  				}
260  			}
261  		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
262  			cond_result = false;
263  			have_result = true;
264  		}
265  	} else if (v0 == v1) {
266  		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
267  		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
268  			// NOTE can't handle float comparisons here because of NaNs
269  			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
270  			have_result = true;
271  		}
272  	}
273  
274  	if (have_result) {
275  		literal result;
276  
277  		if (cond_result)
278  			result = dst_type != AF_FLOAT_DST ?
279  					literal(0xFFFFFFFFu) : literal(1.0f);
280  		else
281  			result = literal(0);
282  
283  		convert_to_mov(n, sh.get_const_value(result));
284  		return fold_alu_op1(n);
285  	}
286  
287  	return false;
288  }
289  
fold(alu_node & n)290  bool expr_handler::fold(alu_node& n) {
291  
292  	switch (n.bc.op_ptr->src_count) {
293  	case 1: return fold_alu_op1(n);
294  	case 2: return fold_alu_op2(n);
295  	case 3: return fold_alu_op3(n);
296  	default:
297  		assert(0);
298  	}
299  	return false;
300  }
301  
fold(fetch_node & n)302  bool expr_handler::fold(fetch_node& n) {
303  
304  	unsigned chan = 0;
305  	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
306  		value* &v = *I;
307  		if (v) {
308  			if (n.bc.dst_sel[chan] == SEL_0)
309  				assign_source(*I, get_const(0.0f));
310  			else if (n.bc.dst_sel[chan] == SEL_1)
311  				assign_source(*I, get_const(1.0f));
312  		}
313  		++chan;
314  	}
315  	return false;
316  }
317  
fold(cf_node & n)318  bool expr_handler::fold(cf_node& n) {
319  	return false;
320  }
321  
apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)322  void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
323                                       literal &v) {
324  	const bc_alu_src &s = bc.src[src];
325  
326  	if (s.abs)
327  		v = fabs(v.f);
328  	if (s.neg)
329  		v = -v.f;
330  }
331  
apply_alu_dst_mod(const bc_alu & bc,literal & v)332  void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
333  	float omod_coeff[] = {2.0f, 4.0, 0.5f};
334  
335  	if (bc.omod)
336  		v = v.f * omod_coeff[bc.omod - 1];
337  	if (bc.clamp)
338  		v = float_clamp(v.f);
339  }
340  
args_equal(const vvec & l,const vvec & r)341  bool expr_handler::args_equal(const vvec &l, const vvec &r) {
342  
343  	assert(l.size() == r.size());
344  
345  	int s = l.size();
346  
347  	for (int k = 0; k < s; ++k) {
348  		if (!l[k]->v_equal(r[k]))
349  			return false;
350  	}
351  
352  	return true;
353  }
354  
ops_equal(const alu_node * l,const alu_node * r)355  bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
356  	const bc_alu &b0 = l->bc;
357  	const bc_alu &b1 = r->bc;
358  
359  	if (b0.op != b1.op)
360  		return false;
361  
362  	unsigned src_count = b0.op_ptr->src_count;
363  
364  	if (b0.index_mode != b1.index_mode)
365  		return false;
366  
367  	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
368  			return false;
369  
370  	for (unsigned s = 0; s < src_count; ++s) {
371  		const bc_alu_src &s0 = b0.src[s];
372  		const bc_alu_src &s1 = b1.src[s];
373  
374  		if (s0.abs != s1.abs || s0.neg != s1.neg)
375  			return false;
376  	}
377  	return args_equal(l->src, r->src);
378  }
379  
fold_alu_op1(alu_node & n)380  bool expr_handler::fold_alu_op1(alu_node& n) {
381  
382  	assert(!n.src.empty());
383  	if (n.src.empty())
384  		return false;
385  
386  	value* v0 = n.src[0]->gvalue();
387  
388  	assert(v0 && n.dst[0]);
389  
390  	if (!v0->is_const()) {
391  		// handle (MOV -(MOV -x)) => (MOV x)
392  		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
393  				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
394  			alu_node *sd = static_cast<alu_node*>(v0->def);
395  			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
396  					sd->bc.src[0].neg) {
397  				n.src[0] = sd->src[0];
398  				n.bc.src[0].neg = 0;
399  				v0 = n.src[0]->gvalue();
400  			}
401  		}
402  
403  		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
404  				n.bc.op == ALU_OP1_MOVA_GPR_INT)
405  				&& n.bc.clamp == 0 && n.bc.omod == 0
406  				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
407  				n.src.size() == 1 /* RIM/SIM can be appended as additional values */) {
408  			assign_source(n.dst[0], v0);
409  			return true;
410  		}
411  		return false;
412  	}
413  
414  	literal dv, cv = v0->get_const_value();
415  	apply_alu_src_mod(n.bc, 0, cv);
416  
417  	switch (n.bc.op) {
418  	case ALU_OP1_CEIL: dv = ceil(cv.f); break;
419  	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
420  	case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
421  	case ALU_OP1_FLOOR: dv = floor(cv.f); break;
422  	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
423  	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
424  	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
425  	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
426  	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
427  	case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
428  	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
429  	case ALU_OP1_LOG_CLAMPED:
430  	case ALU_OP1_LOG_IEEE:
431  		if (cv.f != 0.0f)
432  			dv = log2(cv.f);
433  		else
434  			// don't fold to NAN, let the GPU handle it for now
435  			// (prevents degenerate LIT tests from failing)
436  			return false;
437  		break;
438  	case ALU_OP1_MOV: dv = cv; break;
439  	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
440  //	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
441  //	case ALU_OP1_MOVA_GPR_INT:
442  	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
443  	case ALU_OP1_PRED_SET_INV:
444  		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
445  	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
446  	case ALU_OP1_RECIPSQRT_CLAMPED:
447  	case ALU_OP1_RECIPSQRT_FF:
448  	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
449  	case ALU_OP1_RECIP_CLAMPED:
450  	case ALU_OP1_RECIP_FF:
451  	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
452  //	case ALU_OP1_RECIP_INT:
453  	case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
454  //	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
455  	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
456  	case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
457  	case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
458  
459  	default:
460  		return false;
461  	}
462  
463  	apply_alu_dst_mod(n.bc, dv);
464  	assign_source(n.dst[0], get_const(dv));
465  	return true;
466  }
467  
fold_mul_add(alu_node * n)468  bool expr_handler::fold_mul_add(alu_node *n) {
469  
470  	bool ieee;
471  	value* v0 = n->src[0]->gvalue();
472  
473  	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
474  			static_cast<alu_node*>(v0->def) : NULL;
475  
476  	if (d0) {
477  		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
478  			ieee = true;
479  		else if (d0->is_alu_op(ALU_OP2_MUL))
480  			ieee = false;
481  		else
482  			return false;
483  
484  		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
485  				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
486  				!d0->bc.clamp && !n->bc.omod &&
487  				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
488  						!n->src[1]->is_kcache())) {
489  
490  			bool mul_neg = n->bc.src[0].neg;
491  
492  			n->src.resize(3);
493  			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
494  			n->src[2] = n->src[1];
495  			n->bc.src[2] = n->bc.src[1];
496  			n->src[0] = d0->src[0];
497  			n->bc.src[0] = d0->bc.src[0];
498  			n->src[1] = d0->src[1];
499  			n->bc.src[1] = d0->bc.src[1];
500  
501  			n->bc.src[0].neg ^= mul_neg;
502  
503  			fold_alu_op3(*n);
504  			return true;
505  		}
506  	}
507  
508  	value* v1 = n->src[1]->gvalue();
509  
510  	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
511  			static_cast<alu_node*>(v1->def) : NULL;
512  
513  	if (d1) {
514  		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
515  			ieee = true;
516  		else if (d1->is_alu_op(ALU_OP2_MUL))
517  			ieee = false;
518  		else
519  			return false;
520  
521  		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
522  				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
523  				!d1->bc.clamp && !n->bc.omod &&
524  				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
525  						!n->src[0]->is_kcache())) {
526  
527  			bool mul_neg = n->bc.src[1].neg;
528  
529  			n->src.resize(3);
530  			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
531  			n->src[2] = n->src[0];
532  			n->bc.src[2] = n->bc.src[0];
533  			n->src[1] = d1->src[1];
534  			n->bc.src[1] = d1->bc.src[1];
535  			n->src[0] = d1->src[0];
536  			n->bc.src[0] = d1->bc.src[0];
537  
538  			n->bc.src[1].neg ^= mul_neg;
539  
540  			fold_alu_op3(*n);
541  			return true;
542  		}
543  	}
544  
545  	return false;
546  }
547  
eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)548  bool expr_handler::eval_const_op(unsigned op, literal &r,
549                                   literal cv0, literal cv1) {
550  
551  	switch (op) {
552  	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
553  	case ALU_OP2_ADDC_UINT:
554  		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
555  	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
556  	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
557  	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
558  	case ALU_OP2_BFM_INT:
559  		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
560  	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
561  	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
562  	case ALU_OP2_MAX:
563  	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
564  	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
565  	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
566  	case ALU_OP2_MIN:
567  	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
568  	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
569  	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
570  	case ALU_OP2_MUL:
571  	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
572  	case ALU_OP2_MULHI_INT:
573  		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
574  	case ALU_OP2_MULHI_UINT:
575  		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
576  	case ALU_OP2_MULLO_INT:
577  		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
578  	case ALU_OP2_MULLO_UINT:
579  		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
580  	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
581  	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
582  	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
583  
584  	default:
585  		return false;
586  	}
587  
588  	return true;
589  }
590  
591  // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
fold_assoc(alu_node * n)592  bool expr_handler::fold_assoc(alu_node *n) {
593  
594  	alu_node *a = n;
595  	literal cr;
596  
597  	int last_arg = -3;
598  
599  	unsigned op = n->bc.op;
600  	bool allow_neg = false, cur_neg = false;
601  	bool distribute_neg = false;
602  
603  	switch(op) {
604  	case ALU_OP2_ADD:
605  		distribute_neg = true;
606  		allow_neg = true;
607  		break;
608  	case ALU_OP2_MUL:
609  	case ALU_OP2_MUL_IEEE:
610  		allow_neg = true;
611  		break;
612  	case ALU_OP3_MULADD:
613  		allow_neg = true;
614  		op = ALU_OP2_MUL;
615  		break;
616  	case ALU_OP3_MULADD_IEEE:
617  		allow_neg = true;
618  		op = ALU_OP2_MUL_IEEE;
619  		break;
620  	default:
621  		if (n->bc.op_ptr->src_count != 2)
622  			return false;
623  	}
624  
625  	// check if we can evaluate the op
626  	if (!eval_const_op(op, cr, literal(0), literal(0)))
627  		return false;
628  
629  	while (true) {
630  
631  		value *v0 = a->src[0]->gvalue();
632  		value *v1 = a->src[1]->gvalue();
633  
634  		last_arg = -2;
635  
636  		if (v1->is_const()) {
637  			literal arg = v1->get_const_value();
638  			apply_alu_src_mod(a->bc, 1, arg);
639  			if (cur_neg && distribute_neg)
640  				arg.f = -arg.f;
641  
642  			if (a == n)
643  				cr = arg;
644  			else
645  				eval_const_op(op, cr, cr, arg);
646  
647  			if (v0->def) {
648  				alu_node *d0 = static_cast<alu_node*>(v0->def);
649  				if ((d0->is_alu_op(op) ||
650  						(op == ALU_OP2_MUL_IEEE &&
651  								d0->is_alu_op(ALU_OP2_MUL))) &&
652  						!d0->bc.omod && !d0->bc.clamp &&
653  						!a->bc.src[0].abs &&
654  						(!a->bc.src[0].neg || allow_neg)) {
655  					cur_neg ^= a->bc.src[0].neg;
656  					a = d0;
657  					continue;
658  				}
659  			}
660  			last_arg = 0;
661  
662  		}
663  
664  		if (v0->is_const()) {
665  			literal arg = v0->get_const_value();
666  			apply_alu_src_mod(a->bc, 0, arg);
667  			if (cur_neg && distribute_neg)
668  				arg.f = -arg.f;
669  
670  			if (last_arg == 0) {
671  				eval_const_op(op, cr, cr, arg);
672  				last_arg = -1;
673  				break;
674  			}
675  
676  			if (a == n)
677  				cr = arg;
678  			else
679  				eval_const_op(op, cr, cr, arg);
680  
681  			if (v1->def) {
682  				alu_node *d1 = static_cast<alu_node*>(v1->def);
683  				if ((d1->is_alu_op(op) ||
684  						(op == ALU_OP2_MUL_IEEE &&
685  								d1->is_alu_op(ALU_OP2_MUL))) &&
686  						!d1->bc.omod && !d1->bc.clamp &&
687  						!a->bc.src[1].abs &&
688  						(!a->bc.src[1].neg || allow_neg)) {
689  					cur_neg ^= a->bc.src[1].neg;
690  					a = d1;
691  					continue;
692  				}
693  			}
694  
695  			last_arg = 1;
696  		}
697  
698  		break;
699  	};
700  
701  	if (last_arg == -1) {
702  		// result is const
703  		apply_alu_dst_mod(n->bc, cr);
704  
705  		if (n->bc.op == op) {
706  			convert_to_mov(*n, sh.get_const_value(cr));
707  			fold_alu_op1(*n);
708  			return true;
709  		} else { // MULADD => ADD
710  			n->src[0] = n->src[2];
711  			n->bc.src[0] = n->bc.src[2];
712  			n->src[1] = sh.get_const_value(cr);
713  			memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
714  
715  			n->src.resize(2);
716  			n->bc.set_op(ALU_OP2_ADD);
717  		}
718  	} else if (last_arg >= 0) {
719  		n->src[0] = a->src[last_arg];
720  		n->bc.src[0] = a->bc.src[last_arg];
721  		n->bc.src[0].neg ^= cur_neg;
722  		n->src[1] = sh.get_const_value(cr);
723  		memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
724  	}
725  
726  	return false;
727  }
728  
fold_alu_op2(alu_node & n)729  bool expr_handler::fold_alu_op2(alu_node& n) {
730  
731  	if (n.src.size() < 2)
732  		return false;
733  
734  	unsigned flags = n.bc.op_ptr->flags;
735  
736  	if (flags & AF_SET) {
737  		return fold_setcc(n);
738  	}
739  
740  	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
741  		if (fold_assoc(&n))
742  			return true;
743  	}
744  
745  	value* v0 = n.src[0]->gvalue();
746  	value* v1 = n.src[1]->gvalue();
747  
748  	assert(v0 && v1);
749  
750  	// handle some operations with equal args, e.g. x + x => x * 2
751  	if (v0 == v1) {
752  		if (n.bc.src[0].neg == n.bc.src[1].neg &&
753  				n.bc.src[0].abs == n.bc.src[1].abs) {
754  			switch (n.bc.op) {
755  			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
756  			case ALU_OP2_MAX:
757  				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
758  				return fold_alu_op1(n);
759  			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
760  				if (!sh.safe_math) {
761  					n.src[1] = sh.get_const_value(2.0f);
762  					memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
763  					n.bc.set_op(ALU_OP2_MUL);
764  					return fold_alu_op2(n);
765  				}
766  				break;
767  			}
768  		}
769  		if (n.bc.src[0].neg != n.bc.src[1].neg &&
770  				n.bc.src[0].abs == n.bc.src[1].abs) {
771  			switch (n.bc.op) {
772  			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
773  				if (!sh.safe_math) {
774  					convert_to_mov(n, sh.get_const_value(literal(0)));
775  					return fold_alu_op1(n);
776  				}
777  				break;
778  			}
779  		}
780  	}
781  
782  	if (n.bc.op == ALU_OP2_ADD) {
783  		if (fold_mul_add(&n))
784  			return true;
785  	}
786  
787  	bool isc0 = v0->is_const();
788  	bool isc1 = v1->is_const();
789  
790  	if (!isc0 && !isc1)
791  		return false;
792  
793  	literal dv, cv0, cv1;
794  
795  	if (isc0) {
796  		cv0 = v0->get_const_value();
797  		apply_alu_src_mod(n.bc, 0, cv0);
798  	}
799  
800  	if (isc1) {
801  		cv1 = v1->get_const_value();
802  		apply_alu_src_mod(n.bc, 1, cv1);
803  	}
804  
805  	if (isc0 && isc1) {
806  
807  		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
808  			return false;
809  
810  	} else { // one source is const
811  
812  		if (isc0 && cv0 == literal(0)) {
813  			switch (n.bc.op) {
814  			case ALU_OP2_ADD:
815  			case ALU_OP2_ADD_INT:
816  			case ALU_OP2_MAX_UINT:
817  			case ALU_OP2_OR_INT:
818  			case ALU_OP2_XOR_INT:
819  				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
820  				return fold_alu_op1(n);
821  			case ALU_OP2_AND_INT:
822  			case ALU_OP2_ASHR_INT:
823  			case ALU_OP2_LSHL_INT:
824  			case ALU_OP2_LSHR_INT:
825  			case ALU_OP2_MIN_UINT:
826  			case ALU_OP2_MUL:
827  			case ALU_OP2_MULHI_UINT:
828  			case ALU_OP2_MULLO_UINT:
829  				convert_to_mov(n, sh.get_const_value(literal(0)));
830  				return fold_alu_op1(n);
831  			}
832  		} else if (isc1 && cv1 == literal(0)) {
833  			switch (n.bc.op) {
834  			case ALU_OP2_ADD:
835  			case ALU_OP2_ADD_INT:
836  			case ALU_OP2_ASHR_INT:
837  			case ALU_OP2_LSHL_INT:
838  			case ALU_OP2_LSHR_INT:
839  			case ALU_OP2_MAX_UINT:
840  			case ALU_OP2_OR_INT:
841  			case ALU_OP2_SUB_INT:
842  			case ALU_OP2_XOR_INT:
843  				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
844  				return fold_alu_op1(n);
845  			case ALU_OP2_AND_INT:
846  			case ALU_OP2_MIN_UINT:
847  			case ALU_OP2_MUL:
848  			case ALU_OP2_MULHI_UINT:
849  			case ALU_OP2_MULLO_UINT:
850  				convert_to_mov(n, sh.get_const_value(literal(0)));
851  				return fold_alu_op1(n);
852  			}
853  		} else if (isc0 && cv0 == literal(1.0f)) {
854  			switch (n.bc.op) {
855  			case ALU_OP2_MUL:
856  			case ALU_OP2_MUL_IEEE:
857  				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
858  				return fold_alu_op1(n);
859  			}
860  		} else if (isc1 && cv1 == literal(1.0f)) {
861  			switch (n.bc.op) {
862  			case ALU_OP2_MUL:
863  			case ALU_OP2_MUL_IEEE:
864  				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
865  				return fold_alu_op1(n);
866  			}
867  		}
868  
869  		return false;
870  	}
871  
872  	apply_alu_dst_mod(n.bc, dv);
873  	assign_source(n.dst[0], get_const(dv));
874  	return true;
875  }
876  
evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)877  bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
878                                        literal s1, literal s2) {
879  
880  	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
881  	unsigned cc = alu_cnd_flags & AF_CC_MASK;
882  
883  	switch (cmp_type) {
884  	case AF_FLOAT_CMP: {
885  		switch (cc) {
886  		case AF_CC_E : return s1.f == s2.f;
887  		case AF_CC_GT: return s1.f >  s2.f;
888  		case AF_CC_GE: return s1.f >= s2.f;
889  		case AF_CC_NE: return s1.f != s2.f;
890  		case AF_CC_LT: return s1.f <  s2.f;
891  		case AF_CC_LE: return s1.f <= s2.f;
892  		default:
893  			assert(!"invalid condition code");
894  			return false;
895  		}
896  	}
897  	case AF_INT_CMP: {
898  		switch (cc) {
899  		case AF_CC_E : return s1.i == s2.i;
900  		case AF_CC_GT: return s1.i >  s2.i;
901  		case AF_CC_GE: return s1.i >= s2.i;
902  		case AF_CC_NE: return s1.i != s2.i;
903  		case AF_CC_LT: return s1.i <  s2.i;
904  		case AF_CC_LE: return s1.i <= s2.i;
905  		default:
906  			assert(!"invalid condition code");
907  			return false;
908  		}
909  	}
910  	case AF_UINT_CMP: {
911  		switch (cc) {
912  		case AF_CC_E : return s1.u == s2.u;
913  		case AF_CC_GT: return s1.u >  s2.u;
914  		case AF_CC_GE: return s1.u >= s2.u;
915  		case AF_CC_NE: return s1.u != s2.u;
916  		case AF_CC_LT: return s1.u <  s2.u;
917  		case AF_CC_LE: return s1.u <= s2.u;
918  		default:
919  			assert(!"invalid condition code");
920  			return false;
921  		}
922  	}
923  	default:
924  		assert(!"invalid cmp_type");
925  		return false;
926  	}
927  }
928  
fold_alu_op3(alu_node & n)929  bool expr_handler::fold_alu_op3(alu_node& n) {
930  
931  	if (n.src.size() < 3)
932  		return false;
933  
934  	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
935  		if (fold_assoc(&n))
936  			return true;
937  	}
938  
939  	value* v0 = n.src[0]->gvalue();
940  	value* v1 = n.src[1]->gvalue();
941  	value* v2 = n.src[2]->gvalue();
942  
943  	assert(v0 && v1 && v2 && n.dst[0]);
944  
945  	bool isc0 = v0->is_const();
946  	bool isc1 = v1->is_const();
947  	bool isc2 = v2->is_const();
948  
949  	literal dv, cv0, cv1, cv2;
950  
951  	if (isc0) {
952  		cv0 = v0->get_const_value();
953  		apply_alu_src_mod(n.bc, 0, cv0);
954  	}
955  
956  	if (isc1) {
957  		cv1 = v1->get_const_value();
958  		apply_alu_src_mod(n.bc, 1, cv1);
959  	}
960  
961  	if (isc2) {
962  		cv2 = v2->get_const_value();
963  		apply_alu_src_mod(n.bc, 2, cv2);
964  	}
965  
966  	unsigned flags = n.bc.op_ptr->flags;
967  
968  	if (flags & AF_CMOV) {
969  		int src = 0;
970  
971  		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
972  			// result doesn't depend on condition, convert to MOV
973  			src = 1;
974  		} else if (isc0) {
975  			// src0 is const, condition can be evaluated, convert to MOV
976  			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
977  					AF_CMP_TYPE_MASK), cv0, literal(0));
978  			src = cond ? 1 : 2;
979  		}
980  
981  		if (src) {
982  			// if src is selected, convert to MOV
983  			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
984  			return fold_alu_op1(n);
985  		}
986  	}
987  
988  	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
989  	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
990  			n.bc.op == ALU_OP3_MULADD_IEEE)) {
991  
992  		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
993  				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
994  
995  		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
996  
997  			alu_node *md = static_cast<alu_node*>(v2->def);
998  			value *mv0 = md->src[0]->gvalue();
999  			value *mv1 = md->src[1]->gvalue();
1000  
1001  			int es0 = -1, es1;
1002  
1003  			if (v0 == mv0) {
1004  				es0 = 0;
1005  				es1 = 0;
1006  			} else if (v0 == mv1) {
1007  				es0 = 0;
1008  				es1 = 1;
1009  			} else if (v1 == mv0) {
1010  				es0 = 1;
1011  				es1 = 0;
1012  			} else if (v1 == mv1) {
1013  				es0 = 1;
1014  				es1 = 1;
1015  			}
1016  
1017  			if (es0 != -1) {
1018  				value *va0 = es0 == 0 ? v1 : v0;
1019  				value *va1 = es1 == 0 ? mv1 : mv0;
1020  
1021  				alu_node *add = sh.create_alu();
1022  				add->bc.set_op(ALU_OP2_ADD);
1023  
1024  				add->dst.resize(1);
1025  				add->src.resize(2);
1026  
1027  				value *t = sh.create_temp_value();
1028  				t->def = add;
1029  				add->dst[0] = t;
1030  				add->src[0] = va0;
1031  				add->src[1] = va1;
1032  				add->bc.src[0] = n.bc.src[!es0];
1033  				add->bc.src[1] = md->bc.src[!es1];
1034  
1035  				add->bc.src[1].neg ^= n.bc.src[2].neg ^
1036  						(n.bc.src[es0].neg != md->bc.src[es1].neg);
1037  
1038  				n.insert_before(add);
1039  				vt.add_value(t);
1040  
1041  				t = t->gvalue();
1042  
1043  				if (es0 == 1) {
1044  					n.src[0] = n.src[1];
1045  					n.bc.src[0] = n.bc.src[1];
1046  				}
1047  
1048  				n.src[1] = t;
1049  				memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
1050  
1051  				n.src.resize(2);
1052  
1053  				n.bc.set_op(op);
1054  				return fold_alu_op2(n);
1055  			}
1056  		}
1057  	}
1058  
1059  	if (!isc0 && !isc1 && !isc2)
1060  		return false;
1061  
1062  	if (isc0 && isc1 && isc2) {
1063  		switch (n.bc.op) {
1064  		case ALU_OP3_MULADD_IEEE:
1065  		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1066  
1067  		// TODO
1068  
1069  		default:
1070  			return false;
1071  		}
1072  	} else {
1073  		if (isc0 && isc1) {
1074  			switch (n.bc.op) {
1075  			case ALU_OP3_MULADD:
1076  			case ALU_OP3_MULADD_IEEE:
1077  				dv = cv0.f * cv1.f;
1078  				n.bc.set_op(ALU_OP2_ADD);
1079  				n.src[0] = sh.get_const_value(dv);
1080  				memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
1081  				n.src[1] = n.src[2];
1082  				n.bc.src[1] = n.bc.src[2];
1083  				n.src.resize(2);
1084  				return fold_alu_op2(n);
1085  			}
1086  		}
1087  
1088  		if (n.bc.op == ALU_OP3_MULADD) {
1089  			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1090  				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
1091  				return fold_alu_op1(n);
1092  			}
1093  		}
1094  
1095  		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1096  			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1097  					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1098  
1099  			if (isc1 && v0 == v2) {
1100  				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1101  				n.src[1] = sh.get_const_value(cv1);
1102  				n.bc.src[1].neg = 0;
1103  				n.bc.src[1].abs = 0;
1104  				n.bc.set_op(op);
1105  				n.src.resize(2);
1106  				return fold_alu_op2(n);
1107  			} else if (isc0 && v1 == v2) {
1108  				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1109  				n.src[0] = sh.get_const_value(cv0);
1110  				n.bc.src[0].neg = 0;
1111  				n.bc.src[0].abs = 0;
1112  				n.bc.set_op(op);
1113  				n.src.resize(2);
1114  				return fold_alu_op2(n);
1115  			}
1116  		}
1117  
1118  		return false;
1119  	}
1120  
1121  	apply_alu_dst_mod(n.bc, dv);
1122  	assign_source(n.dst[0], get_const(dv));
1123  	return true;
1124  }
1125  
invert_setcc_condition(unsigned cc,bool & swap_args)1126  unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1127  	unsigned ncc = 0;
1128  
1129  	switch (cc) {
1130  	case AF_CC_E: ncc = AF_CC_NE; break;
1131  	case AF_CC_NE: ncc = AF_CC_E; break;
1132  	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1133  	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1134  	default:
1135  		assert(!"unexpected condition code");
1136  		break;
1137  	}
1138  	return ncc;
1139  }
1140  
get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1141  unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1142  
1143  	if (int_dst && cmp_type == AF_FLOAT_CMP) {
1144  		switch (cc) {
1145  		case AF_CC_E: return ALU_OP2_SETE_DX10;
1146  		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1147  		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1148  		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1149  		}
1150  	} else {
1151  
1152  		switch(cmp_type) {
1153  		case AF_FLOAT_CMP: {
1154  			switch (cc) {
1155  			case AF_CC_E: return ALU_OP2_SETE;
1156  			case AF_CC_NE: return ALU_OP2_SETNE;
1157  			case AF_CC_GT: return ALU_OP2_SETGT;
1158  			case AF_CC_GE: return ALU_OP2_SETGE;
1159  			}
1160  			break;
1161  		}
1162  		case AF_INT_CMP: {
1163  			switch (cc) {
1164  			case AF_CC_E: return ALU_OP2_SETE_INT;
1165  			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1166  			case AF_CC_GT: return ALU_OP2_SETGT_INT;
1167  			case AF_CC_GE: return ALU_OP2_SETGE_INT;
1168  			}
1169  			break;
1170  		}
1171  		case AF_UINT_CMP: {
1172  			switch (cc) {
1173  			case AF_CC_E: return ALU_OP2_SETE_INT;
1174  			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1175  			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1176  			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1177  			}
1178  			break;
1179  		}
1180  		}
1181  	}
1182  
1183  	assert(!"unexpected cc&cmp_type combination");
1184  	return ~0u;
1185  }
1186  
get_predsetcc_op(unsigned cc,unsigned cmp_type)1187  unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1188  
1189  	switch(cmp_type) {
1190  	case AF_FLOAT_CMP: {
1191  		switch (cc) {
1192  		case AF_CC_E: return ALU_OP2_PRED_SETE;
1193  		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1194  		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1195  		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1196  		}
1197  		break;
1198  	}
1199  	case AF_INT_CMP: {
1200  		switch (cc) {
1201  		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1202  		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1203  		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1204  		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1205  		}
1206  		break;
1207  	}
1208  	case AF_UINT_CMP: {
1209  		switch (cc) {
1210  		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1211  		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1212  		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1213  		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1214  		}
1215  		break;
1216  	}
1217  	}
1218  
1219  	assert(!"unexpected cc&cmp_type combination");
1220  	return ~0u;
1221  }
1222  
get_killcc_op(unsigned cc,unsigned cmp_type)1223  unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1224  
1225  	switch(cmp_type) {
1226  	case AF_FLOAT_CMP: {
1227  		switch (cc) {
1228  		case AF_CC_E: return ALU_OP2_KILLE;
1229  		case AF_CC_NE: return ALU_OP2_KILLNE;
1230  		case AF_CC_GT: return ALU_OP2_KILLGT;
1231  		case AF_CC_GE: return ALU_OP2_KILLGE;
1232  		}
1233  		break;
1234  	}
1235  	case AF_INT_CMP: {
1236  		switch (cc) {
1237  		case AF_CC_E: return ALU_OP2_KILLE_INT;
1238  		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1239  		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1240  		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1241  		}
1242  		break;
1243  	}
1244  	case AF_UINT_CMP: {
1245  		switch (cc) {
1246  		case AF_CC_E: return ALU_OP2_KILLE_INT;
1247  		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1248  		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1249  		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1250  		}
1251  		break;
1252  	}
1253  	}
1254  
1255  	assert(!"unexpected cc&cmp_type combination");
1256  	return ~0u;
1257  }
1258  
get_cndcc_op(unsigned cc,unsigned cmp_type)1259  unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1260  
1261  	switch(cmp_type) {
1262  	case AF_FLOAT_CMP: {
1263  		switch (cc) {
1264  		case AF_CC_E: return ALU_OP3_CNDE;
1265  		case AF_CC_GT: return ALU_OP3_CNDGT;
1266  		case AF_CC_GE: return ALU_OP3_CNDGE;
1267  		}
1268  		break;
1269  	}
1270  	case AF_INT_CMP: {
1271  		switch (cc) {
1272  		case AF_CC_E: return ALU_OP3_CNDE_INT;
1273  		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1274  		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1275  		}
1276  		break;
1277  	}
1278  	}
1279  
1280  	assert(!"unexpected cc&cmp_type combination");
1281  	return ~0u;
1282  }
1283  
1284  
convert_predset_to_set(shader & sh,alu_node * a)1285  void convert_predset_to_set(shader& sh, alu_node* a) {
1286  
1287  	unsigned flags = a->bc.op_ptr->flags;
1288  	unsigned cc = flags & AF_CC_MASK;
1289  	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1290  
1291  	bool swap_args = false;
1292  
1293  	cc = invert_setcc_condition(cc, swap_args);
1294  
1295  	unsigned newop = get_setcc_op(cc, cmp_type, true);
1296  
1297  	a->dst.resize(1);
1298  	a->bc.set_op(newop);
1299  
1300  	if (swap_args) {
1301  		std::swap(a->src[0], a->src[1]);
1302  		std::swap(a->bc.src[0], a->bc.src[1]);
1303  	}
1304  
1305  	a->bc.update_exec_mask = 0;
1306  	a->bc.update_pred = 0;
1307  }
1308  
1309  } // namespace r600_sb
1310