1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
get_select_value_for_em(shader & sh,value * em)33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 n.src.resize(1);
54 n.src[0] = src;
55 n.bc.src[0].abs = abs;
56 n.bc.src[0].neg = neg;
57 n.bc.set_op(ALU_OP1_MOV);
58 }
59
expr_handler(shader & sh)60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
get_const(const literal & l)62 value * expr_handler::get_const(const literal &l) {
63 value *v = sh.get_const_value(l);
64 if (!v->gvn_source)
65 vt.add_value(v);
66 return v;
67 }
68
assign_source(value * dst,value * src)69 void expr_handler::assign_source(value *dst, value *src) {
70 dst->gvn_source = src->gvn_source;
71 }
72
equal(value * l,value * r)73 bool expr_handler::equal(value *l, value *r) {
74
75 assert(l != r);
76
77 if (l->is_lds_access() || r->is_lds_access())
78 return false;
79 if (l->gvalue() == r->gvalue())
80 return true;
81
82 if (l->def && r->def)
83 return defs_equal(l, r);
84
85 if (l->is_rel() && r->is_rel())
86 return ivars_equal(l, r);
87
88 return false;
89 }
90
ivars_equal(value * l,value * r)91 bool expr_handler::ivars_equal(value* l, value* r) {
92 if (l->rel->gvalue() == r->rel->gvalue()
93 && l->select == r->select) {
94
95 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
96 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
97
98 // FIXME: replace this with more precise aliasing test
99 return lv == rv;
100 }
101 return false;
102 }
103
defs_equal(value * l,value * r)104 bool expr_handler::defs_equal(value* l, value* r) {
105
106 node *d1 = l->def;
107 node *d2 = r->def;
108
109 if (d1->type != d2->type || d1->subtype != d2->subtype)
110 return false;
111
112 if (d1->is_pred_set() || d2->is_pred_set())
113 return false;
114
115 if (d1->type == NT_OP) {
116 switch (d1->subtype) {
117 case NST_ALU_INST:
118 return ops_equal(
119 static_cast<alu_node*>(d1),
120 static_cast<alu_node*>(d2));
121 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
122 // static_cast<fetch_node*>(d2);
123 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
124 // static_cast<cf_node*>(d2);
125 default:
126 break;
127 }
128 }
129 return false;
130 }
131
try_fold(value * v)132 bool expr_handler::try_fold(value* v) {
133 assert(!v->gvn_source);
134
135 if (v->def)
136 try_fold(v->def);
137
138 if (v->gvn_source)
139 return true;
140
141 return false;
142 }
143
try_fold(node * n)144 bool expr_handler::try_fold(node* n) {
145 return n->fold_dispatch(this);
146 }
147
fold(node & n)148 bool expr_handler::fold(node& n) {
149 if (n.subtype == NST_PHI) {
150
151 value *s = n.src[0];
152
153 // FIXME disabling phi folding for registers for now, otherwise we lose
154 // control flow information in some cases
155 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
156 // probably control flow transformation is required to enable it
157 if (s->is_sgpr())
158 return false;
159
160 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
161 value *v = *I;
162 if (!s->v_equal(v))
163 return false;
164 }
165
166 assign_source(n.dst[0], s);
167 } else {
168 assert(n.subtype == NST_PSI);
169 assert(n.src.size() >= 6);
170
171 value *s = n.src[2];
172 assert(s->gvn_source);
173
174 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
175 value *v = *(I+2);
176 if (!s->v_equal(v))
177 return false;
178 }
179 assign_source(n.dst[0], s);
180 }
181 return true;
182 }
183
fold(container_node & n)184 bool expr_handler::fold(container_node& n) {
185 return false;
186 }
187
fold_setcc(alu_node & n)188 bool expr_handler::fold_setcc(alu_node &n) {
189
190 value* v0 = n.src[0]->gvalue();
191 value* v1 = n.src[1]->gvalue();
192
193 assert(v0 && v1 && n.dst[0]);
194
195 unsigned flags = n.bc.op_ptr->flags;
196 unsigned cc = flags & AF_CC_MASK;
197 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
198 unsigned dst_type = flags & AF_DST_TYPE_MASK;
199
200 bool cond_result;
201 bool have_result = false;
202
203 bool isc0 = v0->is_const();
204 bool isc1 = v1->is_const();
205
206 literal dv, cv0, cv1;
207
208 if (isc0) {
209 cv0 = v0->get_const_value();
210 apply_alu_src_mod(n.bc, 0, cv0);
211 }
212
213 if (isc1) {
214 cv1 = v1->get_const_value();
215 apply_alu_src_mod(n.bc, 1, cv1);
216 }
217
218 if (isc0 && isc1) {
219 cond_result = evaluate_condition(flags, cv0, cv1);
220 have_result = true;
221 } else if (isc1) {
222 if (cmp_type == AF_FLOAT_CMP) {
223 if (n.bc.src[0].abs && !n.bc.src[0].neg) {
224 if (cv1.f < 0.0f && cc == AF_CC_NE) {
225 cond_result = true;
226 have_result = true;
227 }
228 } else if (n.bc.src[0].abs && n.bc.src[0].neg) {
229 if (cv1.f > 0.0f && cc == AF_CC_E) {
230 cond_result = false;
231 have_result = true;
232 }
233 }
234 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
235 cond_result = true;
236 have_result = true;
237 }
238 } else if (isc0) {
239 if (cmp_type == AF_FLOAT_CMP) {
240 if (n.bc.src[1].abs && !n.bc.src[1].neg) {
241 if (cv0.f < 0.0f && (cc == AF_CC_E)) {
242 cond_result = false;
243 have_result = true;
244 }
245 } else if (n.bc.src[1].abs && n.bc.src[1].neg) {
246 if (cv0.f > 0.0f && cc == AF_CC_NE) {
247 cond_result = true;
248 have_result = true;
249 }
250 }
251 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
252 cond_result = false;
253 have_result = true;
254 }
255 } else if (v0 == v1) {
256 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
257 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
258 // NOTE can't handle float comparisons here because of NaNs
259 cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
260 have_result = true;
261 }
262 }
263
264 if (have_result) {
265 literal result;
266
267 if (cond_result)
268 result = dst_type != AF_FLOAT_DST ?
269 literal(0xFFFFFFFFu) : literal(1.0f);
270 else
271 result = literal(0);
272
273 convert_to_mov(n, sh.get_const_value(result));
274 return fold_alu_op1(n);
275 }
276
277 return false;
278 }
279
fold(alu_node & n)280 bool expr_handler::fold(alu_node& n) {
281
282 switch (n.bc.op_ptr->src_count) {
283 case 1: return fold_alu_op1(n);
284 case 2: return fold_alu_op2(n);
285 case 3: return fold_alu_op3(n);
286 default:
287 assert(0);
288 }
289 return false;
290 }
291
fold(fetch_node & n)292 bool expr_handler::fold(fetch_node& n) {
293
294 unsigned chan = 0;
295 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
296 value* &v = *I;
297 if (v) {
298 if (n.bc.dst_sel[chan] == SEL_0)
299 assign_source(*I, get_const(0.0f));
300 else if (n.bc.dst_sel[chan] == SEL_1)
301 assign_source(*I, get_const(1.0f));
302 }
303 ++chan;
304 }
305 return false;
306 }
307
fold(cf_node & n)308 bool expr_handler::fold(cf_node& n) {
309 return false;
310 }
311
apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)312 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
313 literal &v) {
314 const bc_alu_src &s = bc.src[src];
315
316 if (s.abs)
317 v = fabsf(v.f);
318 if (s.neg)
319 v = -v.f;
320 }
321
apply_alu_dst_mod(const bc_alu & bc,literal & v)322 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
323 const float omod_coeff[] = {2.0f, 4.0, 0.5f};
324
325 if (bc.omod)
326 v = v.f * omod_coeff[bc.omod - 1];
327 if (bc.clamp)
328 v = float_clamp(v.f);
329 }
330
args_equal(const vvec & l,const vvec & r)331 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
332
333 assert(l.size() == r.size());
334
335 int s = l.size();
336
337 for (int k = 0; k < s; ++k) {
338 if (!l[k]->v_equal(r[k]))
339 return false;
340 }
341
342 return true;
343 }
344
ops_equal(const alu_node * l,const alu_node * r)345 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
346 const bc_alu &b0 = l->bc;
347 const bc_alu &b1 = r->bc;
348
349 if (b0.op != b1.op)
350 return false;
351
352 unsigned src_count = b0.op_ptr->src_count;
353
354 if (b0.index_mode != b1.index_mode)
355 return false;
356
357 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
358 return false;
359
360 for (unsigned s = 0; s < src_count; ++s) {
361 const bc_alu_src &s0 = b0.src[s];
362 const bc_alu_src &s1 = b1.src[s];
363
364 if (s0.abs != s1.abs || s0.neg != s1.neg)
365 return false;
366 }
367 return args_equal(l->src, r->src);
368 }
369
fold_alu_op1(alu_node & n)370 bool expr_handler::fold_alu_op1(alu_node& n) {
371
372 assert(!n.src.empty());
373 if (n.src.empty())
374 return false;
375
376 /* don't fold LDS instructions */
377 if (n.bc.op_ptr->flags & AF_LDS)
378 return false;
379
380 value* v0 = n.src[0]->gvalue();
381
382 if (v0->is_lds_oq() || v0->is_lds_access())
383 return false;
384 assert(v0 && n.dst[0]);
385
386 if (!v0->is_const()) {
387 // handle (MOV -(MOV -x)) => (MOV x)
388 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
389 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
390 alu_node *sd = static_cast<alu_node*>(v0->def);
391 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
392 sd->bc.src[0].neg) {
393 n.src[0] = sd->src[0];
394 n.bc.src[0].neg = 0;
395 v0 = n.src[0]->gvalue();
396 }
397 }
398
399 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
400 n.bc.op == ALU_OP1_MOVA_GPR_INT)
401 && n.bc.clamp == 0 && n.bc.omod == 0
402 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
403 n.src.size() == 1 /* RIM/SIM can be appended as additional values */
404 && n.dst[0]->no_reladdr_conflict_with(v0)) {
405 assign_source(n.dst[0], v0);
406 return true;
407 }
408 return false;
409 }
410
411 literal dv, cv = v0->get_const_value();
412 apply_alu_src_mod(n.bc, 0, cv);
413
414 switch (n.bc.op) {
415 case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
416 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
417 case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
418 case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
419 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
420 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
421 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
422 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
423 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
424 case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
425 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
426 case ALU_OP1_LOG_CLAMPED:
427 case ALU_OP1_LOG_IEEE:
428 if (cv.f != 0.0f)
429 dv = log2f(cv.f);
430 else
431 // don't fold to NAN, let the GPU handle it for now
432 // (prevents degenerate LIT tests from failing)
433 return false;
434 break;
435 case ALU_OP1_MOV: dv = cv; break;
436 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
437 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
438 // case ALU_OP1_MOVA_GPR_INT:
439 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
440 case ALU_OP1_PRED_SET_INV:
441 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
442 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
443 case ALU_OP1_RECIPSQRT_CLAMPED:
444 case ALU_OP1_RECIPSQRT_FF:
445 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
446 case ALU_OP1_RECIP_CLAMPED:
447 case ALU_OP1_RECIP_FF:
448 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
449 // case ALU_OP1_RECIP_INT:
450 case ALU_OP1_RECIP_UINT: {
451 if (!cv.u)
452 return false;
453 dv.u = (1ull << 32) / cv.u;
454 break;
455 }
456 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
457 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
458 case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
459 case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
460
461 default:
462 return false;
463 }
464
465 apply_alu_dst_mod(n.bc, dv);
466 assign_source(n.dst[0], get_const(dv));
467 return true;
468 }
469
fold_mul_add(alu_node * n)470 bool expr_handler::fold_mul_add(alu_node *n) {
471
472 bool ieee;
473 value* v0 = n->src[0]->gvalue();
474
475 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
476 static_cast<alu_node*>(v0->def) : NULL;
477
478 if (d0) {
479 if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
480 ieee = true;
481 else if (d0->is_alu_op(ALU_OP2_MUL))
482 ieee = false;
483 else
484 return false;
485
486 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
487 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
488 !d0->bc.clamp && !n->bc.omod &&
489 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
490 !n->src[1]->is_kcache())) {
491
492 bool mul_neg = n->bc.src[0].neg;
493
494 n->src.resize(3);
495 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
496 n->src[2] = n->src[1];
497 n->bc.src[2] = n->bc.src[1];
498 n->src[0] = d0->src[0];
499 n->bc.src[0] = d0->bc.src[0];
500 n->src[1] = d0->src[1];
501 n->bc.src[1] = d0->bc.src[1];
502
503 n->bc.src[0].neg ^= mul_neg;
504
505 fold_alu_op3(*n);
506 return true;
507 }
508 }
509
510 value* v1 = n->src[1]->gvalue();
511
512 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
513 static_cast<alu_node*>(v1->def) : NULL;
514
515 if (d1) {
516 if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
517 ieee = true;
518 else if (d1->is_alu_op(ALU_OP2_MUL))
519 ieee = false;
520 else
521 return false;
522
523 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
524 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
525 !d1->bc.clamp && !n->bc.omod &&
526 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
527 !n->src[0]->is_kcache())) {
528
529 bool mul_neg = n->bc.src[1].neg;
530
531 n->src.resize(3);
532 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
533 n->src[2] = n->src[0];
534 n->bc.src[2] = n->bc.src[0];
535 n->src[1] = d1->src[1];
536 n->bc.src[1] = d1->bc.src[1];
537 n->src[0] = d1->src[0];
538 n->bc.src[0] = d1->bc.src[0];
539
540 n->bc.src[1].neg ^= mul_neg;
541
542 fold_alu_op3(*n);
543 return true;
544 }
545 }
546
547 return false;
548 }
549
eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)550 bool expr_handler::eval_const_op(unsigned op, literal &r,
551 literal cv0, literal cv1) {
552
553 switch (op) {
554 case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
555 case ALU_OP2_ADDC_UINT:
556 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
557 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
558 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
559 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
560 case ALU_OP2_BFM_INT:
561 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
562 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
563 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
564 case ALU_OP2_MAX:
565 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
566 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
567 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
568 case ALU_OP2_MIN:
569 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
570 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
571 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
572 case ALU_OP2_MUL:
573 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
574 case ALU_OP2_MULHI_INT:
575 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
576 case ALU_OP2_MULHI_UINT:
577 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
578 case ALU_OP2_MULLO_INT:
579 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
580 case ALU_OP2_MULLO_UINT:
581 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
582 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
583 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
584 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
585
586 default:
587 return false;
588 }
589
590 return true;
591 }
592
593 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
fold_assoc(alu_node * n)594 bool expr_handler::fold_assoc(alu_node *n) {
595
596 alu_node *a = n;
597 literal cr;
598
599 int last_arg = -3;
600
601 unsigned op = n->bc.op;
602 bool allow_neg = false, cur_neg = false;
603 bool distribute_neg = false;
604
605 switch(op) {
606 case ALU_OP2_ADD:
607 distribute_neg = true;
608 allow_neg = true;
609 break;
610 case ALU_OP2_MUL:
611 case ALU_OP2_MUL_IEEE:
612 allow_neg = true;
613 break;
614 case ALU_OP3_MULADD:
615 allow_neg = true;
616 op = ALU_OP2_MUL;
617 break;
618 case ALU_OP3_MULADD_IEEE:
619 allow_neg = true;
620 op = ALU_OP2_MUL_IEEE;
621 break;
622 default:
623 if (n->bc.op_ptr->src_count != 2)
624 return false;
625 }
626
627 // check if we can evaluate the op
628 if (!eval_const_op(op, cr, literal(0), literal(0)))
629 return false;
630
631 while (true) {
632
633 value *v0 = a->src[0]->gvalue();
634 value *v1 = a->src[1]->gvalue();
635
636 last_arg = -2;
637
638 if (v1->is_const()) {
639 literal arg = v1->get_const_value();
640 apply_alu_src_mod(a->bc, 1, arg);
641 if (cur_neg && distribute_neg)
642 arg.f = -arg.f;
643
644 if (a == n)
645 cr = arg;
646 else
647 eval_const_op(op, cr, cr, arg);
648
649 if (v0->def) {
650 alu_node *d0 = static_cast<alu_node*>(v0->def);
651 if ((d0->is_alu_op(op) ||
652 (op == ALU_OP2_MUL_IEEE &&
653 d0->is_alu_op(ALU_OP2_MUL))) &&
654 !d0->bc.omod && !d0->bc.clamp &&
655 !a->bc.src[0].abs &&
656 (!a->bc.src[0].neg || allow_neg)) {
657 cur_neg ^= a->bc.src[0].neg;
658 a = d0;
659 continue;
660 }
661 }
662 last_arg = 0;
663
664 }
665
666 if (v0->is_const()) {
667 literal arg = v0->get_const_value();
668 apply_alu_src_mod(a->bc, 0, arg);
669 if (cur_neg && distribute_neg)
670 arg.f = -arg.f;
671
672 if (last_arg == 0) {
673 eval_const_op(op, cr, cr, arg);
674 last_arg = -1;
675 break;
676 }
677
678 if (a == n)
679 cr = arg;
680 else
681 eval_const_op(op, cr, cr, arg);
682
683 if (v1->def) {
684 alu_node *d1 = static_cast<alu_node*>(v1->def);
685 if ((d1->is_alu_op(op) ||
686 (op == ALU_OP2_MUL_IEEE &&
687 d1->is_alu_op(ALU_OP2_MUL))) &&
688 !d1->bc.omod && !d1->bc.clamp &&
689 !a->bc.src[1].abs &&
690 (!a->bc.src[1].neg || allow_neg)) {
691 cur_neg ^= a->bc.src[1].neg;
692 a = d1;
693 continue;
694 }
695 }
696
697 last_arg = 1;
698 }
699
700 break;
701 };
702
703 if (last_arg == -1) {
704 // result is const
705 apply_alu_dst_mod(n->bc, cr);
706
707 if (n->bc.op == op) {
708 convert_to_mov(*n, sh.get_const_value(cr));
709 fold_alu_op1(*n);
710 return true;
711 } else { // MULADD => ADD
712 n->src[0] = n->src[2];
713 n->bc.src[0] = n->bc.src[2];
714 n->src[1] = sh.get_const_value(cr);
715 n->bc.src[1].clear();
716
717 n->src.resize(2);
718 n->bc.set_op(ALU_OP2_ADD);
719 }
720 } else if (last_arg >= 0) {
721 n->src[0] = a->src[last_arg];
722 n->bc.src[0] = a->bc.src[last_arg];
723 n->bc.src[0].neg ^= cur_neg;
724 n->src[1] = sh.get_const_value(cr);
725 n->bc.src[1].clear();
726 }
727
728 return false;
729 }
730
fold_alu_op2(alu_node & n)731 bool expr_handler::fold_alu_op2(alu_node& n) {
732
733 if (n.src.size() < 2)
734 return false;
735
736 unsigned flags = n.bc.op_ptr->flags;
737
738 if (flags & AF_SET) {
739 return fold_setcc(n);
740 }
741
742 if (!sh.safe_math && (flags & AF_M_ASSOC)) {
743 if (fold_assoc(&n))
744 return true;
745 }
746
747 value* v0 = n.src[0]->gvalue();
748 value* v1 = n.src[1]->gvalue();
749
750 assert(v0 && v1);
751
752 // handle some operations with equal args, e.g. x + x => x * 2
753 if (v0 == v1) {
754 if (n.bc.src[0].neg == n.bc.src[1].neg &&
755 n.bc.src[0].abs == n.bc.src[1].abs) {
756 switch (n.bc.op) {
757 case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
758 case ALU_OP2_MIN_DX10:
759 case ALU_OP2_MAX:
760 case ALU_OP2_MAX_DX10:
761 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
762 return fold_alu_op1(n);
763 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
764 if (!sh.safe_math) {
765 n.src[1] = sh.get_const_value(2.0f);
766 n.bc.src[1].clear();
767 n.bc.set_op(ALU_OP2_MUL);
768 return fold_alu_op2(n);
769 }
770 break;
771 }
772 }
773 if (n.bc.src[0].neg != n.bc.src[1].neg &&
774 n.bc.src[0].abs == n.bc.src[1].abs) {
775 switch (n.bc.op) {
776 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0)
777 if (!sh.safe_math) {
778 convert_to_mov(n, sh.get_const_value(literal(0)));
779 return fold_alu_op1(n);
780 }
781 break;
782 }
783 }
784 }
785
786 if (n.bc.op == ALU_OP2_ADD) {
787 if (fold_mul_add(&n))
788 return true;
789 }
790
791 bool isc0 = v0->is_const();
792 bool isc1 = v1->is_const();
793
794 if (!isc0 && !isc1)
795 return false;
796
797 literal dv, cv0, cv1;
798
799 if (isc0) {
800 cv0 = v0->get_const_value();
801 apply_alu_src_mod(n.bc, 0, cv0);
802 }
803
804 if (isc1) {
805 cv1 = v1->get_const_value();
806 apply_alu_src_mod(n.bc, 1, cv1);
807 }
808
809 if (isc0 && isc1) {
810
811 if (!eval_const_op(n.bc.op, dv, cv0, cv1))
812 return false;
813
814 } else { // one source is const
815
816 if (isc0 && cv0 == literal(0)) {
817 switch (n.bc.op) {
818 case ALU_OP2_ADD:
819 case ALU_OP2_ADD_INT:
820 case ALU_OP2_MAX_UINT:
821 case ALU_OP2_OR_INT:
822 case ALU_OP2_XOR_INT:
823 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
824 return fold_alu_op1(n);
825 case ALU_OP2_AND_INT:
826 case ALU_OP2_ASHR_INT:
827 case ALU_OP2_LSHL_INT:
828 case ALU_OP2_LSHR_INT:
829 case ALU_OP2_MIN_UINT:
830 case ALU_OP2_MUL:
831 case ALU_OP2_MULHI_UINT:
832 case ALU_OP2_MULLO_UINT:
833 convert_to_mov(n, sh.get_const_value(literal(0)));
834 return fold_alu_op1(n);
835 }
836 } else if (isc1 && cv1 == literal(0)) {
837 switch (n.bc.op) {
838 case ALU_OP2_ADD:
839 case ALU_OP2_ADD_INT:
840 case ALU_OP2_ASHR_INT:
841 case ALU_OP2_LSHL_INT:
842 case ALU_OP2_LSHR_INT:
843 case ALU_OP2_MAX_UINT:
844 case ALU_OP2_OR_INT:
845 case ALU_OP2_SUB_INT:
846 case ALU_OP2_XOR_INT:
847 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
848 return fold_alu_op1(n);
849 case ALU_OP2_AND_INT:
850 case ALU_OP2_MIN_UINT:
851 case ALU_OP2_MUL:
852 case ALU_OP2_MULHI_UINT:
853 case ALU_OP2_MULLO_UINT:
854 convert_to_mov(n, sh.get_const_value(literal(0)));
855 return fold_alu_op1(n);
856 }
857 } else if (isc0 && cv0 == literal(1.0f)) {
858 switch (n.bc.op) {
859 case ALU_OP2_MUL:
860 case ALU_OP2_MUL_IEEE:
861 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
862 return fold_alu_op1(n);
863 }
864 } else if (isc1 && cv1 == literal(1.0f)) {
865 switch (n.bc.op) {
866 case ALU_OP2_MUL:
867 case ALU_OP2_MUL_IEEE:
868 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
869 return fold_alu_op1(n);
870 }
871 }
872
873 return false;
874 }
875
876 apply_alu_dst_mod(n.bc, dv);
877 assign_source(n.dst[0], get_const(dv));
878 return true;
879 }
880
evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)881 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
882 literal s1, literal s2) {
883
884 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
885 unsigned cc = alu_cnd_flags & AF_CC_MASK;
886
887 switch (cmp_type) {
888 case AF_FLOAT_CMP: {
889 switch (cc) {
890 case AF_CC_E : return s1.f == s2.f;
891 case AF_CC_GT: return s1.f > s2.f;
892 case AF_CC_GE: return s1.f >= s2.f;
893 case AF_CC_NE: return s1.f != s2.f;
894 case AF_CC_LT: return s1.f < s2.f;
895 case AF_CC_LE: return s1.f <= s2.f;
896 default:
897 assert(!"invalid condition code");
898 return false;
899 }
900 }
901 case AF_INT_CMP: {
902 switch (cc) {
903 case AF_CC_E : return s1.i == s2.i;
904 case AF_CC_GT: return s1.i > s2.i;
905 case AF_CC_GE: return s1.i >= s2.i;
906 case AF_CC_NE: return s1.i != s2.i;
907 case AF_CC_LT: return s1.i < s2.i;
908 case AF_CC_LE: return s1.i <= s2.i;
909 default:
910 assert(!"invalid condition code");
911 return false;
912 }
913 }
914 case AF_UINT_CMP: {
915 switch (cc) {
916 case AF_CC_E : return s1.u == s2.u;
917 case AF_CC_GT: return s1.u > s2.u;
918 case AF_CC_GE: return s1.u >= s2.u;
919 case AF_CC_NE: return s1.u != s2.u;
920 case AF_CC_LT: return s1.u < s2.u;
921 case AF_CC_LE: return s1.u <= s2.u;
922 default:
923 assert(!"invalid condition code");
924 return false;
925 }
926 }
927 default:
928 assert(!"invalid cmp_type");
929 return false;
930 }
931 }
932
fold_alu_op3(alu_node & n)933 bool expr_handler::fold_alu_op3(alu_node& n) {
934
935 if (n.src.size() < 3)
936 return false;
937
938 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
939 if (fold_assoc(&n))
940 return true;
941 if (n.src.size() < 3)
942 return fold_alu_op2(n);
943 }
944
945 value* v0 = n.src[0]->gvalue();
946 value* v1 = n.src[1]->gvalue();
947 value* v2 = n.src[2]->gvalue();
948
949 /* LDS instructions look like op3 with no dst - don't fold. */
950 if (!n.dst[0])
951 return false;
952 assert(v0 && v1 && v2 && n.dst[0]);
953
954 bool isc0 = v0->is_const();
955 bool isc1 = v1->is_const();
956 bool isc2 = v2->is_const();
957
958 literal dv, cv0, cv1, cv2;
959
960 if (isc0) {
961 cv0 = v0->get_const_value();
962 apply_alu_src_mod(n.bc, 0, cv0);
963 }
964
965 if (isc1) {
966 cv1 = v1->get_const_value();
967 apply_alu_src_mod(n.bc, 1, cv1);
968 }
969
970 if (isc2) {
971 cv2 = v2->get_const_value();
972 apply_alu_src_mod(n.bc, 2, cv2);
973 }
974
975 unsigned flags = n.bc.op_ptr->flags;
976
977 if (flags & AF_CMOV) {
978 int src = 0;
979
980 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
981 // result doesn't depend on condition, convert to MOV
982 src = 1;
983 } else if (isc0) {
984 // src0 is const, condition can be evaluated, convert to MOV
985 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
986 AF_CMP_TYPE_MASK), cv0, literal(0));
987 src = cond ? 1 : 2;
988 }
989
990 if (src) {
991 // if src is selected, convert to MOV
992 convert_to_mov(n, n.src[src], n.bc.src[src].neg);
993 return fold_alu_op1(n);
994 }
995 }
996
997 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
998 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
999 n.bc.op == ALU_OP3_MULADD_IEEE)) {
1000
1001 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1002 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1003
1004 if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
1005
1006 alu_node *md = static_cast<alu_node*>(v2->def);
1007 value *mv0 = md->src[0]->gvalue();
1008 value *mv1 = md->src[1]->gvalue();
1009
1010 int es0 = -1, es1 = -1;
1011
1012 if (v0 == mv0) {
1013 es0 = 0;
1014 es1 = 0;
1015 } else if (v0 == mv1) {
1016 es0 = 0;
1017 es1 = 1;
1018 } else if (v1 == mv0) {
1019 es0 = 1;
1020 es1 = 0;
1021 } else if (v1 == mv1) {
1022 es0 = 1;
1023 es1 = 1;
1024 }
1025
1026 value *va0 = es0 == 0 ? v1 : v0;
1027 value *va1 = es1 == 0 ? mv1 : mv0;
1028
1029 /* Don't fold if no equal multipliers were found.
1030 * Also don#t fold if the operands of the to be created ADD are both
1031 * relatively accessed with different AR values because that would
1032 * create impossible code.
1033 */
1034 if (es0 != -1 &&
1035 (!va0->is_rel() || !va1->is_rel() ||
1036 (va0->rel == va1->rel))) {
1037
1038 alu_node *add = sh.create_alu();
1039 add->bc.set_op(ALU_OP2_ADD);
1040
1041 add->dst.resize(1);
1042 add->src.resize(2);
1043
1044 value *t = sh.create_temp_value();
1045 t->def = add;
1046 add->dst[0] = t;
1047 add->src[0] = va0;
1048 add->src[1] = va1;
1049 add->bc.src[0] = n.bc.src[!es0];
1050 add->bc.src[1] = md->bc.src[!es1];
1051
1052 add->bc.src[1].neg ^= n.bc.src[2].neg ^
1053 (n.bc.src[es0].neg != md->bc.src[es1].neg);
1054
1055 n.insert_before(add);
1056 vt.add_value(t);
1057
1058 t = t->gvalue();
1059
1060 if (es0 == 1) {
1061 n.src[0] = n.src[1];
1062 n.bc.src[0] = n.bc.src[1];
1063 }
1064
1065 n.src[1] = t;
1066 n.bc.src[1].clear();
1067
1068 n.src.resize(2);
1069
1070 n.bc.set_op(op);
1071 return fold_alu_op2(n);
1072 }
1073 }
1074 }
1075
1076 if (!isc0 && !isc1 && !isc2)
1077 return false;
1078
1079 if (isc0 && isc1 && isc2) {
1080 switch (n.bc.op) {
1081 case ALU_OP3_MULADD_IEEE:
1082 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1083
1084 // TODO
1085
1086 default:
1087 return false;
1088 }
1089 } else {
1090 if (isc0 && isc1) {
1091 switch (n.bc.op) {
1092 case ALU_OP3_MULADD:
1093 case ALU_OP3_MULADD_IEEE:
1094 dv = cv0.f * cv1.f;
1095 n.bc.set_op(ALU_OP2_ADD);
1096 n.src[0] = sh.get_const_value(dv);
1097 n.bc.src[0].clear();
1098 n.src[1] = n.src[2];
1099 n.bc.src[1] = n.bc.src[2];
1100 n.src.resize(2);
1101 return fold_alu_op2(n);
1102 }
1103 }
1104
1105 if (n.bc.op == ALU_OP3_MULADD) {
1106 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1107 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs);
1108 return fold_alu_op1(n);
1109 }
1110 }
1111
1112 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1113 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1114 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1115
1116 if (isc1 && v0 == v2) {
1117 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1118 n.src[1] = sh.get_const_value(cv1);
1119 n.bc.src[1].neg = 0;
1120 n.bc.src[1].abs = 0;
1121 n.bc.set_op(op);
1122 n.src.resize(2);
1123 return fold_alu_op2(n);
1124 } else if (isc0 && v1 == v2) {
1125 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1126 n.src[0] = sh.get_const_value(cv0);
1127 n.bc.src[0].neg = 0;
1128 n.bc.src[0].abs = 0;
1129 n.bc.set_op(op);
1130 n.src.resize(2);
1131 return fold_alu_op2(n);
1132 }
1133 }
1134
1135 return false;
1136 }
1137
1138 apply_alu_dst_mod(n.bc, dv);
1139 assign_source(n.dst[0], get_const(dv));
1140 return true;
1141 }
1142
invert_setcc_condition(unsigned cc,bool & swap_args)1143 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1144 unsigned ncc = 0;
1145
1146 switch (cc) {
1147 case AF_CC_E: ncc = AF_CC_NE; break;
1148 case AF_CC_NE: ncc = AF_CC_E; break;
1149 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1150 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1151 default:
1152 assert(!"unexpected condition code");
1153 break;
1154 }
1155 return ncc;
1156 }
1157
get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1158 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1159
1160 if (int_dst && cmp_type == AF_FLOAT_CMP) {
1161 switch (cc) {
1162 case AF_CC_E: return ALU_OP2_SETE_DX10;
1163 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1164 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1165 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1166 }
1167 } else {
1168
1169 switch(cmp_type) {
1170 case AF_FLOAT_CMP: {
1171 switch (cc) {
1172 case AF_CC_E: return ALU_OP2_SETE;
1173 case AF_CC_NE: return ALU_OP2_SETNE;
1174 case AF_CC_GT: return ALU_OP2_SETGT;
1175 case AF_CC_GE: return ALU_OP2_SETGE;
1176 }
1177 break;
1178 }
1179 case AF_INT_CMP: {
1180 switch (cc) {
1181 case AF_CC_E: return ALU_OP2_SETE_INT;
1182 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1183 case AF_CC_GT: return ALU_OP2_SETGT_INT;
1184 case AF_CC_GE: return ALU_OP2_SETGE_INT;
1185 }
1186 break;
1187 }
1188 case AF_UINT_CMP: {
1189 switch (cc) {
1190 case AF_CC_E: return ALU_OP2_SETE_INT;
1191 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1192 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1193 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1194 }
1195 break;
1196 }
1197 }
1198 }
1199
1200 assert(!"unexpected cc&cmp_type combination");
1201 return ~0u;
1202 }
1203
get_predsetcc_op(unsigned cc,unsigned cmp_type)1204 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1205
1206 switch(cmp_type) {
1207 case AF_FLOAT_CMP: {
1208 switch (cc) {
1209 case AF_CC_E: return ALU_OP2_PRED_SETE;
1210 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1211 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1212 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1213 }
1214 break;
1215 }
1216 case AF_INT_CMP: {
1217 switch (cc) {
1218 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1219 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1220 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1221 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1222 }
1223 break;
1224 }
1225 case AF_UINT_CMP: {
1226 switch (cc) {
1227 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1228 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1229 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1230 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1231 }
1232 break;
1233 }
1234 }
1235
1236 assert(!"unexpected cc&cmp_type combination");
1237 return ~0u;
1238 }
1239
get_killcc_op(unsigned cc,unsigned cmp_type)1240 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1241
1242 switch(cmp_type) {
1243 case AF_FLOAT_CMP: {
1244 switch (cc) {
1245 case AF_CC_E: return ALU_OP2_KILLE;
1246 case AF_CC_NE: return ALU_OP2_KILLNE;
1247 case AF_CC_GT: return ALU_OP2_KILLGT;
1248 case AF_CC_GE: return ALU_OP2_KILLGE;
1249 }
1250 break;
1251 }
1252 case AF_INT_CMP: {
1253 switch (cc) {
1254 case AF_CC_E: return ALU_OP2_KILLE_INT;
1255 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1256 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1257 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1258 }
1259 break;
1260 }
1261 case AF_UINT_CMP: {
1262 switch (cc) {
1263 case AF_CC_E: return ALU_OP2_KILLE_INT;
1264 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1265 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1266 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1267 }
1268 break;
1269 }
1270 }
1271
1272 assert(!"unexpected cc&cmp_type combination");
1273 return ~0u;
1274 }
1275
get_cndcc_op(unsigned cc,unsigned cmp_type)1276 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1277
1278 switch(cmp_type) {
1279 case AF_FLOAT_CMP: {
1280 switch (cc) {
1281 case AF_CC_E: return ALU_OP3_CNDE;
1282 case AF_CC_GT: return ALU_OP3_CNDGT;
1283 case AF_CC_GE: return ALU_OP3_CNDGE;
1284 }
1285 break;
1286 }
1287 case AF_INT_CMP: {
1288 switch (cc) {
1289 case AF_CC_E: return ALU_OP3_CNDE_INT;
1290 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1291 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1292 }
1293 break;
1294 }
1295 }
1296
1297 assert(!"unexpected cc&cmp_type combination");
1298 return ~0u;
1299 }
1300
1301
convert_predset_to_set(shader & sh,alu_node * a)1302 void convert_predset_to_set(shader& sh, alu_node* a) {
1303
1304 unsigned flags = a->bc.op_ptr->flags;
1305 unsigned cc = flags & AF_CC_MASK;
1306 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1307
1308 bool swap_args = false;
1309
1310 cc = invert_setcc_condition(cc, swap_args);
1311
1312 unsigned newop = get_setcc_op(cc, cmp_type, true);
1313
1314 a->dst.resize(1);
1315 a->bc.set_op(newop);
1316
1317 if (swap_args) {
1318 std::swap(a->src[0], a->src[1]);
1319 std::swap(a->bc.src[0], a->bc.src[1]);
1320 }
1321
1322 a->bc.update_exec_mask = 0;
1323 a->bc.update_pred = 0;
1324 }
1325
1326 } // namespace r600_sb
1327