1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
get_select_value_for_em(shader & sh,value * em)33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 n.src.resize(1);
54 n.src[0] = src;
55 n.bc.src[0].abs = abs;
56 n.bc.src[0].neg = neg;
57 n.bc.set_op(ALU_OP1_MOV);
58 }
59
expr_handler(shader & sh)60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
get_const(const literal & l)62 value * expr_handler::get_const(const literal &l) {
63 value *v = sh.get_const_value(l);
64 if (!v->gvn_source)
65 vt.add_value(v);
66 return v;
67 }
68
assign_source(value * dst,value * src)69 void expr_handler::assign_source(value *dst, value *src) {
70 dst->gvn_source = src->gvn_source;
71 }
72
equal(value * l,value * r)73 bool expr_handler::equal(value *l, value *r) {
74
75 assert(l != r);
76
77 if (l->is_lds_access() || r->is_lds_access())
78 return false;
79 if (l->gvalue() == r->gvalue())
80 return true;
81
82 if (l->def && r->def)
83 return defs_equal(l, r);
84
85 if (l->is_rel() && r->is_rel())
86 return ivars_equal(l, r);
87
88 return false;
89 }
90
ivars_equal(value * l,value * r)91 bool expr_handler::ivars_equal(value* l, value* r) {
92 if (l->rel->gvalue() == r->rel->gvalue()
93 && l->select == r->select) {
94
95 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
96 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
97
98 // FIXME: replace this with more precise aliasing test
99 return lv == rv;
100 }
101 return false;
102 }
103
defs_equal(value * l,value * r)104 bool expr_handler::defs_equal(value* l, value* r) {
105
106 node *d1 = l->def;
107 node *d2 = r->def;
108
109 if (d1->type != d2->type || d1->subtype != d2->subtype)
110 return false;
111
112 if (d1->is_pred_set() || d2->is_pred_set())
113 return false;
114
115 if (d1->type == NT_OP) {
116 switch (d1->subtype) {
117 case NST_ALU_INST:
118 return ops_equal(
119 static_cast<alu_node*>(d1),
120 static_cast<alu_node*>(d2));
121 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
122 // static_cast<fetch_node*>(d2);
123 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
124 // static_cast<cf_node*>(d2);
125 default:
126 break;
127 }
128 }
129 return false;
130 }
131
try_fold(value * v)132 bool expr_handler::try_fold(value* v) {
133 assert(!v->gvn_source);
134
135 if (v->def)
136 try_fold(v->def);
137
138 if (v->gvn_source)
139 return true;
140
141 return false;
142 }
143
try_fold(node * n)144 bool expr_handler::try_fold(node* n) {
145 return n->fold_dispatch(this);
146 }
147
fold(node & n)148 bool expr_handler::fold(node& n) {
149 if (n.subtype == NST_PHI) {
150
151 value *s = n.src[0];
152
153 // FIXME disabling phi folding for registers for now, otherwise we lose
154 // control flow information in some cases
155 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
156 // probably control flow transformation is required to enable it
157 if (s->is_sgpr())
158 return false;
159
160 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
161 value *v = *I;
162 if (!s->v_equal(v))
163 return false;
164 }
165
166 assign_source(n.dst[0], s);
167 } else {
168 assert(n.subtype == NST_PSI);
169 assert(n.src.size() >= 6);
170
171 value *s = n.src[2];
172 assert(s->gvn_source);
173
174 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
175 value *v = *(I+2);
176 if (!s->v_equal(v))
177 return false;
178 }
179 assign_source(n.dst[0], s);
180 }
181 return true;
182 }
183
fold(container_node & n)184 bool expr_handler::fold(container_node& n) {
185 return false;
186 }
187
fold_setcc(alu_node & n)188 bool expr_handler::fold_setcc(alu_node &n) {
189
190 value* v0 = n.src[0]->gvalue();
191 value* v1 = n.src[1]->gvalue();
192
193 assert(v0 && v1 && n.dst[0]);
194
195 unsigned flags = n.bc.op_ptr->flags;
196 unsigned cc = flags & AF_CC_MASK;
197 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
198 unsigned dst_type = flags & AF_DST_TYPE_MASK;
199
200 bool cond_result;
201 bool have_result = false;
202
203 bool isc0 = v0->is_const();
204 bool isc1 = v1->is_const();
205
206 literal dv, cv0, cv1;
207
208 if (isc0) {
209 cv0 = v0->get_const_value();
210 apply_alu_src_mod(n.bc, 0, cv0);
211 }
212
213 if (isc1) {
214 cv1 = v1->get_const_value();
215 apply_alu_src_mod(n.bc, 1, cv1);
216 }
217
218 if (isc0 && isc1) {
219 cond_result = evaluate_condition(flags, cv0, cv1);
220 have_result = true;
221 } else if (isc1) {
222 if (cmp_type == AF_FLOAT_CMP) {
223 if (n.bc.src[0].abs && !n.bc.src[0].neg) {
224 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
225 cond_result = true;
226 have_result = true;
227 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
228 cond_result = true;
229 have_result = true;
230 }
231 } else if (n.bc.src[0].abs && n.bc.src[0].neg) {
232 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
233 cond_result = false;
234 have_result = true;
235 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
236 cond_result = false;
237 have_result = true;
238 }
239 }
240 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
241 cond_result = true;
242 have_result = true;
243 }
244 } else if (isc0) {
245 if (cmp_type == AF_FLOAT_CMP) {
246 if (n.bc.src[1].abs && !n.bc.src[1].neg) {
247 if (cv0.f <= 0.0f && cc == AF_CC_GT) {
248 cond_result = false;
249 have_result = true;
250 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
251 cond_result = false;
252 have_result = true;
253 }
254 } else if (n.bc.src[1].abs && n.bc.src[1].neg) {
255 if (cv0.f >= 0.0f && cc == AF_CC_GE) {
256 cond_result = true;
257 have_result = true;
258 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
259 cond_result = true;
260 have_result = true;
261 }
262 }
263 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
264 cond_result = false;
265 have_result = true;
266 }
267 } else if (v0 == v1) {
268 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
269 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
270 // NOTE can't handle float comparisons here because of NaNs
271 cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
272 have_result = true;
273 }
274 }
275
276 if (have_result) {
277 literal result;
278
279 if (cond_result)
280 result = dst_type != AF_FLOAT_DST ?
281 literal(0xFFFFFFFFu) : literal(1.0f);
282 else
283 result = literal(0);
284
285 convert_to_mov(n, sh.get_const_value(result));
286 return fold_alu_op1(n);
287 }
288
289 return false;
290 }
291
fold(alu_node & n)292 bool expr_handler::fold(alu_node& n) {
293
294 switch (n.bc.op_ptr->src_count) {
295 case 1: return fold_alu_op1(n);
296 case 2: return fold_alu_op2(n);
297 case 3: return fold_alu_op3(n);
298 default:
299 assert(0);
300 }
301 return false;
302 }
303
fold(fetch_node & n)304 bool expr_handler::fold(fetch_node& n) {
305
306 unsigned chan = 0;
307 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
308 value* &v = *I;
309 if (v) {
310 if (n.bc.dst_sel[chan] == SEL_0)
311 assign_source(*I, get_const(0.0f));
312 else if (n.bc.dst_sel[chan] == SEL_1)
313 assign_source(*I, get_const(1.0f));
314 }
315 ++chan;
316 }
317 return false;
318 }
319
fold(cf_node & n)320 bool expr_handler::fold(cf_node& n) {
321 return false;
322 }
323
apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)324 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
325 literal &v) {
326 const bc_alu_src &s = bc.src[src];
327
328 if (s.abs)
329 v = fabsf(v.f);
330 if (s.neg)
331 v = -v.f;
332 }
333
apply_alu_dst_mod(const bc_alu & bc,literal & v)334 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
335 const float omod_coeff[] = {2.0f, 4.0, 0.5f};
336
337 if (bc.omod)
338 v = v.f * omod_coeff[bc.omod - 1];
339 if (bc.clamp)
340 v = float_clamp(v.f);
341 }
342
args_equal(const vvec & l,const vvec & r)343 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
344
345 assert(l.size() == r.size());
346
347 int s = l.size();
348
349 for (int k = 0; k < s; ++k) {
350 if (!l[k]->v_equal(r[k]))
351 return false;
352 }
353
354 return true;
355 }
356
ops_equal(const alu_node * l,const alu_node * r)357 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
358 const bc_alu &b0 = l->bc;
359 const bc_alu &b1 = r->bc;
360
361 if (b0.op != b1.op)
362 return false;
363
364 unsigned src_count = b0.op_ptr->src_count;
365
366 if (b0.index_mode != b1.index_mode)
367 return false;
368
369 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
370 return false;
371
372 for (unsigned s = 0; s < src_count; ++s) {
373 const bc_alu_src &s0 = b0.src[s];
374 const bc_alu_src &s1 = b1.src[s];
375
376 if (s0.abs != s1.abs || s0.neg != s1.neg)
377 return false;
378 }
379 return args_equal(l->src, r->src);
380 }
381
fold_alu_op1(alu_node & n)382 bool expr_handler::fold_alu_op1(alu_node& n) {
383
384 assert(!n.src.empty());
385 if (n.src.empty())
386 return false;
387
388 /* don't fold LDS instructions */
389 if (n.bc.op_ptr->flags & AF_LDS)
390 return false;
391
392 value* v0 = n.src[0]->gvalue();
393
394 if (v0->is_lds_oq() || v0->is_lds_access())
395 return false;
396 assert(v0 && n.dst[0]);
397
398 if (!v0->is_const()) {
399 // handle (MOV -(MOV -x)) => (MOV x)
400 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
401 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
402 alu_node *sd = static_cast<alu_node*>(v0->def);
403 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
404 sd->bc.src[0].neg) {
405 n.src[0] = sd->src[0];
406 n.bc.src[0].neg = 0;
407 v0 = n.src[0]->gvalue();
408 }
409 }
410
411 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
412 n.bc.op == ALU_OP1_MOVA_GPR_INT)
413 && n.bc.clamp == 0 && n.bc.omod == 0
414 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
415 n.src.size() == 1 /* RIM/SIM can be appended as additional values */
416 && n.dst[0]->no_reladdr_conflict_with(v0)) {
417 assign_source(n.dst[0], v0);
418 return true;
419 }
420 return false;
421 }
422
423 literal dv, cv = v0->get_const_value();
424 apply_alu_src_mod(n.bc, 0, cv);
425
426 switch (n.bc.op) {
427 case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
428 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
429 case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
430 case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
431 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
432 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
433 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
434 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
435 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
436 case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
437 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
438 case ALU_OP1_LOG_CLAMPED:
439 case ALU_OP1_LOG_IEEE:
440 if (cv.f != 0.0f)
441 dv = log2f(cv.f);
442 else
443 // don't fold to NAN, let the GPU handle it for now
444 // (prevents degenerate LIT tests from failing)
445 return false;
446 break;
447 case ALU_OP1_MOV: dv = cv; break;
448 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
449 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
450 // case ALU_OP1_MOVA_GPR_INT:
451 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
452 case ALU_OP1_PRED_SET_INV:
453 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
454 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
455 case ALU_OP1_RECIPSQRT_CLAMPED:
456 case ALU_OP1_RECIPSQRT_FF:
457 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
458 case ALU_OP1_RECIP_CLAMPED:
459 case ALU_OP1_RECIP_FF:
460 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
461 // case ALU_OP1_RECIP_INT:
462 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
463 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
464 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
465 case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
466 case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
467
468 default:
469 return false;
470 }
471
472 apply_alu_dst_mod(n.bc, dv);
473 assign_source(n.dst[0], get_const(dv));
474 return true;
475 }
476
fold_mul_add(alu_node * n)477 bool expr_handler::fold_mul_add(alu_node *n) {
478
479 bool ieee;
480 value* v0 = n->src[0]->gvalue();
481
482 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
483 static_cast<alu_node*>(v0->def) : NULL;
484
485 if (d0) {
486 if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
487 ieee = true;
488 else if (d0->is_alu_op(ALU_OP2_MUL))
489 ieee = false;
490 else
491 return false;
492
493 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
494 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
495 !d0->bc.clamp && !n->bc.omod &&
496 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
497 !n->src[1]->is_kcache())) {
498
499 bool mul_neg = n->bc.src[0].neg;
500
501 n->src.resize(3);
502 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
503 n->src[2] = n->src[1];
504 n->bc.src[2] = n->bc.src[1];
505 n->src[0] = d0->src[0];
506 n->bc.src[0] = d0->bc.src[0];
507 n->src[1] = d0->src[1];
508 n->bc.src[1] = d0->bc.src[1];
509
510 n->bc.src[0].neg ^= mul_neg;
511
512 fold_alu_op3(*n);
513 return true;
514 }
515 }
516
517 value* v1 = n->src[1]->gvalue();
518
519 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
520 static_cast<alu_node*>(v1->def) : NULL;
521
522 if (d1) {
523 if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
524 ieee = true;
525 else if (d1->is_alu_op(ALU_OP2_MUL))
526 ieee = false;
527 else
528 return false;
529
530 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
531 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
532 !d1->bc.clamp && !n->bc.omod &&
533 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
534 !n->src[0]->is_kcache())) {
535
536 bool mul_neg = n->bc.src[1].neg;
537
538 n->src.resize(3);
539 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
540 n->src[2] = n->src[0];
541 n->bc.src[2] = n->bc.src[0];
542 n->src[1] = d1->src[1];
543 n->bc.src[1] = d1->bc.src[1];
544 n->src[0] = d1->src[0];
545 n->bc.src[0] = d1->bc.src[0];
546
547 n->bc.src[1].neg ^= mul_neg;
548
549 fold_alu_op3(*n);
550 return true;
551 }
552 }
553
554 return false;
555 }
556
eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)557 bool expr_handler::eval_const_op(unsigned op, literal &r,
558 literal cv0, literal cv1) {
559
560 switch (op) {
561 case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
562 case ALU_OP2_ADDC_UINT:
563 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
564 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
565 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
566 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
567 case ALU_OP2_BFM_INT:
568 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
569 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
570 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
571 case ALU_OP2_MAX:
572 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
573 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
574 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
575 case ALU_OP2_MIN:
576 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
577 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
578 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
579 case ALU_OP2_MUL:
580 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
581 case ALU_OP2_MULHI_INT:
582 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
583 case ALU_OP2_MULHI_UINT:
584 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
585 case ALU_OP2_MULLO_INT:
586 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
587 case ALU_OP2_MULLO_UINT:
588 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
589 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
590 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
591 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
592
593 default:
594 return false;
595 }
596
597 return true;
598 }
599
600 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
fold_assoc(alu_node * n)601 bool expr_handler::fold_assoc(alu_node *n) {
602
603 alu_node *a = n;
604 literal cr;
605
606 int last_arg = -3;
607
608 unsigned op = n->bc.op;
609 bool allow_neg = false, cur_neg = false;
610 bool distribute_neg = false;
611
612 switch(op) {
613 case ALU_OP2_ADD:
614 distribute_neg = true;
615 allow_neg = true;
616 break;
617 case ALU_OP2_MUL:
618 case ALU_OP2_MUL_IEEE:
619 allow_neg = true;
620 break;
621 case ALU_OP3_MULADD:
622 allow_neg = true;
623 op = ALU_OP2_MUL;
624 break;
625 case ALU_OP3_MULADD_IEEE:
626 allow_neg = true;
627 op = ALU_OP2_MUL_IEEE;
628 break;
629 default:
630 if (n->bc.op_ptr->src_count != 2)
631 return false;
632 }
633
634 // check if we can evaluate the op
635 if (!eval_const_op(op, cr, literal(0), literal(0)))
636 return false;
637
638 while (true) {
639
640 value *v0 = a->src[0]->gvalue();
641 value *v1 = a->src[1]->gvalue();
642
643 last_arg = -2;
644
645 if (v1->is_const()) {
646 literal arg = v1->get_const_value();
647 apply_alu_src_mod(a->bc, 1, arg);
648 if (cur_neg && distribute_neg)
649 arg.f = -arg.f;
650
651 if (a == n)
652 cr = arg;
653 else
654 eval_const_op(op, cr, cr, arg);
655
656 if (v0->def) {
657 alu_node *d0 = static_cast<alu_node*>(v0->def);
658 if ((d0->is_alu_op(op) ||
659 (op == ALU_OP2_MUL_IEEE &&
660 d0->is_alu_op(ALU_OP2_MUL))) &&
661 !d0->bc.omod && !d0->bc.clamp &&
662 !a->bc.src[0].abs &&
663 (!a->bc.src[0].neg || allow_neg)) {
664 cur_neg ^= a->bc.src[0].neg;
665 a = d0;
666 continue;
667 }
668 }
669 last_arg = 0;
670
671 }
672
673 if (v0->is_const()) {
674 literal arg = v0->get_const_value();
675 apply_alu_src_mod(a->bc, 0, arg);
676 if (cur_neg && distribute_neg)
677 arg.f = -arg.f;
678
679 if (last_arg == 0) {
680 eval_const_op(op, cr, cr, arg);
681 last_arg = -1;
682 break;
683 }
684
685 if (a == n)
686 cr = arg;
687 else
688 eval_const_op(op, cr, cr, arg);
689
690 if (v1->def) {
691 alu_node *d1 = static_cast<alu_node*>(v1->def);
692 if ((d1->is_alu_op(op) ||
693 (op == ALU_OP2_MUL_IEEE &&
694 d1->is_alu_op(ALU_OP2_MUL))) &&
695 !d1->bc.omod && !d1->bc.clamp &&
696 !a->bc.src[1].abs &&
697 (!a->bc.src[1].neg || allow_neg)) {
698 cur_neg ^= a->bc.src[1].neg;
699 a = d1;
700 continue;
701 }
702 }
703
704 last_arg = 1;
705 }
706
707 break;
708 };
709
710 if (last_arg == -1) {
711 // result is const
712 apply_alu_dst_mod(n->bc, cr);
713
714 if (n->bc.op == op) {
715 convert_to_mov(*n, sh.get_const_value(cr));
716 fold_alu_op1(*n);
717 return true;
718 } else { // MULADD => ADD
719 n->src[0] = n->src[2];
720 n->bc.src[0] = n->bc.src[2];
721 n->src[1] = sh.get_const_value(cr);
722 n->bc.src[1].clear();
723
724 n->src.resize(2);
725 n->bc.set_op(ALU_OP2_ADD);
726 }
727 } else if (last_arg >= 0) {
728 n->src[0] = a->src[last_arg];
729 n->bc.src[0] = a->bc.src[last_arg];
730 n->bc.src[0].neg ^= cur_neg;
731 n->src[1] = sh.get_const_value(cr);
732 n->bc.src[1].clear();
733 }
734
735 return false;
736 }
737
fold_alu_op2(alu_node & n)738 bool expr_handler::fold_alu_op2(alu_node& n) {
739
740 if (n.src.size() < 2)
741 return false;
742
743 unsigned flags = n.bc.op_ptr->flags;
744
745 if (flags & AF_SET) {
746 return fold_setcc(n);
747 }
748
749 if (!sh.safe_math && (flags & AF_M_ASSOC)) {
750 if (fold_assoc(&n))
751 return true;
752 }
753
754 value* v0 = n.src[0]->gvalue();
755 value* v1 = n.src[1]->gvalue();
756
757 assert(v0 && v1);
758
759 // handle some operations with equal args, e.g. x + x => x * 2
760 if (v0 == v1) {
761 if (n.bc.src[0].neg == n.bc.src[1].neg &&
762 n.bc.src[0].abs == n.bc.src[1].abs) {
763 switch (n.bc.op) {
764 case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
765 case ALU_OP2_MIN_DX10:
766 case ALU_OP2_MAX:
767 case ALU_OP2_MAX_DX10:
768 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
769 return fold_alu_op1(n);
770 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
771 if (!sh.safe_math) {
772 n.src[1] = sh.get_const_value(2.0f);
773 n.bc.src[1].clear();
774 n.bc.set_op(ALU_OP2_MUL);
775 return fold_alu_op2(n);
776 }
777 break;
778 }
779 }
780 if (n.bc.src[0].neg != n.bc.src[1].neg &&
781 n.bc.src[0].abs == n.bc.src[1].abs) {
782 switch (n.bc.op) {
783 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0)
784 if (!sh.safe_math) {
785 convert_to_mov(n, sh.get_const_value(literal(0)));
786 return fold_alu_op1(n);
787 }
788 break;
789 }
790 }
791 }
792
793 if (n.bc.op == ALU_OP2_ADD) {
794 if (fold_mul_add(&n))
795 return true;
796 }
797
798 bool isc0 = v0->is_const();
799 bool isc1 = v1->is_const();
800
801 if (!isc0 && !isc1)
802 return false;
803
804 literal dv, cv0, cv1;
805
806 if (isc0) {
807 cv0 = v0->get_const_value();
808 apply_alu_src_mod(n.bc, 0, cv0);
809 }
810
811 if (isc1) {
812 cv1 = v1->get_const_value();
813 apply_alu_src_mod(n.bc, 1, cv1);
814 }
815
816 if (isc0 && isc1) {
817
818 if (!eval_const_op(n.bc.op, dv, cv0, cv1))
819 return false;
820
821 } else { // one source is const
822
823 if (isc0 && cv0 == literal(0)) {
824 switch (n.bc.op) {
825 case ALU_OP2_ADD:
826 case ALU_OP2_ADD_INT:
827 case ALU_OP2_MAX_UINT:
828 case ALU_OP2_OR_INT:
829 case ALU_OP2_XOR_INT:
830 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
831 return fold_alu_op1(n);
832 case ALU_OP2_AND_INT:
833 case ALU_OP2_ASHR_INT:
834 case ALU_OP2_LSHL_INT:
835 case ALU_OP2_LSHR_INT:
836 case ALU_OP2_MIN_UINT:
837 case ALU_OP2_MUL:
838 case ALU_OP2_MULHI_UINT:
839 case ALU_OP2_MULLO_UINT:
840 convert_to_mov(n, sh.get_const_value(literal(0)));
841 return fold_alu_op1(n);
842 }
843 } else if (isc1 && cv1 == literal(0)) {
844 switch (n.bc.op) {
845 case ALU_OP2_ADD:
846 case ALU_OP2_ADD_INT:
847 case ALU_OP2_ASHR_INT:
848 case ALU_OP2_LSHL_INT:
849 case ALU_OP2_LSHR_INT:
850 case ALU_OP2_MAX_UINT:
851 case ALU_OP2_OR_INT:
852 case ALU_OP2_SUB_INT:
853 case ALU_OP2_XOR_INT:
854 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
855 return fold_alu_op1(n);
856 case ALU_OP2_AND_INT:
857 case ALU_OP2_MIN_UINT:
858 case ALU_OP2_MUL:
859 case ALU_OP2_MULHI_UINT:
860 case ALU_OP2_MULLO_UINT:
861 convert_to_mov(n, sh.get_const_value(literal(0)));
862 return fold_alu_op1(n);
863 }
864 } else if (isc0 && cv0 == literal(1.0f)) {
865 switch (n.bc.op) {
866 case ALU_OP2_MUL:
867 case ALU_OP2_MUL_IEEE:
868 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
869 return fold_alu_op1(n);
870 }
871 } else if (isc1 && cv1 == literal(1.0f)) {
872 switch (n.bc.op) {
873 case ALU_OP2_MUL:
874 case ALU_OP2_MUL_IEEE:
875 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
876 return fold_alu_op1(n);
877 }
878 }
879
880 return false;
881 }
882
883 apply_alu_dst_mod(n.bc, dv);
884 assign_source(n.dst[0], get_const(dv));
885 return true;
886 }
887
evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)888 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
889 literal s1, literal s2) {
890
891 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
892 unsigned cc = alu_cnd_flags & AF_CC_MASK;
893
894 switch (cmp_type) {
895 case AF_FLOAT_CMP: {
896 switch (cc) {
897 case AF_CC_E : return s1.f == s2.f;
898 case AF_CC_GT: return s1.f > s2.f;
899 case AF_CC_GE: return s1.f >= s2.f;
900 case AF_CC_NE: return s1.f != s2.f;
901 case AF_CC_LT: return s1.f < s2.f;
902 case AF_CC_LE: return s1.f <= s2.f;
903 default:
904 assert(!"invalid condition code");
905 return false;
906 }
907 }
908 case AF_INT_CMP: {
909 switch (cc) {
910 case AF_CC_E : return s1.i == s2.i;
911 case AF_CC_GT: return s1.i > s2.i;
912 case AF_CC_GE: return s1.i >= s2.i;
913 case AF_CC_NE: return s1.i != s2.i;
914 case AF_CC_LT: return s1.i < s2.i;
915 case AF_CC_LE: return s1.i <= s2.i;
916 default:
917 assert(!"invalid condition code");
918 return false;
919 }
920 }
921 case AF_UINT_CMP: {
922 switch (cc) {
923 case AF_CC_E : return s1.u == s2.u;
924 case AF_CC_GT: return s1.u > s2.u;
925 case AF_CC_GE: return s1.u >= s2.u;
926 case AF_CC_NE: return s1.u != s2.u;
927 case AF_CC_LT: return s1.u < s2.u;
928 case AF_CC_LE: return s1.u <= s2.u;
929 default:
930 assert(!"invalid condition code");
931 return false;
932 }
933 }
934 default:
935 assert(!"invalid cmp_type");
936 return false;
937 }
938 }
939
fold_alu_op3(alu_node & n)940 bool expr_handler::fold_alu_op3(alu_node& n) {
941
942 if (n.src.size() < 3)
943 return false;
944
945 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
946 if (fold_assoc(&n))
947 return true;
948 if (n.src.size() < 3)
949 return fold_alu_op2(n);
950 }
951
952 value* v0 = n.src[0]->gvalue();
953 value* v1 = n.src[1]->gvalue();
954 value* v2 = n.src[2]->gvalue();
955
956 /* LDS instructions look like op3 with no dst - don't fold. */
957 if (!n.dst[0])
958 return false;
959 assert(v0 && v1 && v2 && n.dst[0]);
960
961 bool isc0 = v0->is_const();
962 bool isc1 = v1->is_const();
963 bool isc2 = v2->is_const();
964
965 literal dv, cv0, cv1, cv2;
966
967 if (isc0) {
968 cv0 = v0->get_const_value();
969 apply_alu_src_mod(n.bc, 0, cv0);
970 }
971
972 if (isc1) {
973 cv1 = v1->get_const_value();
974 apply_alu_src_mod(n.bc, 1, cv1);
975 }
976
977 if (isc2) {
978 cv2 = v2->get_const_value();
979 apply_alu_src_mod(n.bc, 2, cv2);
980 }
981
982 unsigned flags = n.bc.op_ptr->flags;
983
984 if (flags & AF_CMOV) {
985 int src = 0;
986
987 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
988 // result doesn't depend on condition, convert to MOV
989 src = 1;
990 } else if (isc0) {
991 // src0 is const, condition can be evaluated, convert to MOV
992 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
993 AF_CMP_TYPE_MASK), cv0, literal(0));
994 src = cond ? 1 : 2;
995 }
996
997 if (src) {
998 // if src is selected, convert to MOV
999 convert_to_mov(n, n.src[src], n.bc.src[src].neg);
1000 return fold_alu_op1(n);
1001 }
1002 }
1003
1004 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
1005 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
1006 n.bc.op == ALU_OP3_MULADD_IEEE)) {
1007
1008 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1009 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1010
1011 if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
1012
1013 alu_node *md = static_cast<alu_node*>(v2->def);
1014 value *mv0 = md->src[0]->gvalue();
1015 value *mv1 = md->src[1]->gvalue();
1016
1017 int es0 = -1, es1;
1018
1019 if (v0 == mv0) {
1020 es0 = 0;
1021 es1 = 0;
1022 } else if (v0 == mv1) {
1023 es0 = 0;
1024 es1 = 1;
1025 } else if (v1 == mv0) {
1026 es0 = 1;
1027 es1 = 0;
1028 } else if (v1 == mv1) {
1029 es0 = 1;
1030 es1 = 1;
1031 }
1032
1033 value *va0 = es0 == 0 ? v1 : v0;
1034 value *va1 = es1 == 0 ? mv1 : mv0;
1035
1036 /* Don't fold if no equal multipliers were found.
1037 * Also don#t fold if the operands of the to be created ADD are both
1038 * relatively accessed with different AR values because that would
1039 * create impossible code.
1040 */
1041 if (es0 != -1 &&
1042 (!va0->is_rel() || !va1->is_rel() ||
1043 (va0->rel == va1->rel))) {
1044
1045 alu_node *add = sh.create_alu();
1046 add->bc.set_op(ALU_OP2_ADD);
1047
1048 add->dst.resize(1);
1049 add->src.resize(2);
1050
1051 value *t = sh.create_temp_value();
1052 t->def = add;
1053 add->dst[0] = t;
1054 add->src[0] = va0;
1055 add->src[1] = va1;
1056 add->bc.src[0] = n.bc.src[!es0];
1057 add->bc.src[1] = md->bc.src[!es1];
1058
1059 add->bc.src[1].neg ^= n.bc.src[2].neg ^
1060 (n.bc.src[es0].neg != md->bc.src[es1].neg);
1061
1062 n.insert_before(add);
1063 vt.add_value(t);
1064
1065 t = t->gvalue();
1066
1067 if (es0 == 1) {
1068 n.src[0] = n.src[1];
1069 n.bc.src[0] = n.bc.src[1];
1070 }
1071
1072 n.src[1] = t;
1073 n.bc.src[1].clear();
1074
1075 n.src.resize(2);
1076
1077 n.bc.set_op(op);
1078 return fold_alu_op2(n);
1079 }
1080 }
1081 }
1082
1083 if (!isc0 && !isc1 && !isc2)
1084 return false;
1085
1086 if (isc0 && isc1 && isc2) {
1087 switch (n.bc.op) {
1088 case ALU_OP3_MULADD_IEEE:
1089 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1090
1091 // TODO
1092
1093 default:
1094 return false;
1095 }
1096 } else {
1097 if (isc0 && isc1) {
1098 switch (n.bc.op) {
1099 case ALU_OP3_MULADD:
1100 case ALU_OP3_MULADD_IEEE:
1101 dv = cv0.f * cv1.f;
1102 n.bc.set_op(ALU_OP2_ADD);
1103 n.src[0] = sh.get_const_value(dv);
1104 n.bc.src[0].clear();
1105 n.src[1] = n.src[2];
1106 n.bc.src[1] = n.bc.src[2];
1107 n.src.resize(2);
1108 return fold_alu_op2(n);
1109 }
1110 }
1111
1112 if (n.bc.op == ALU_OP3_MULADD) {
1113 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1114 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs);
1115 return fold_alu_op1(n);
1116 }
1117 }
1118
1119 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1120 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1121 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1122
1123 if (isc1 && v0 == v2) {
1124 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1125 n.src[1] = sh.get_const_value(cv1);
1126 n.bc.src[1].neg = 0;
1127 n.bc.src[1].abs = 0;
1128 n.bc.set_op(op);
1129 n.src.resize(2);
1130 return fold_alu_op2(n);
1131 } else if (isc0 && v1 == v2) {
1132 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1133 n.src[0] = sh.get_const_value(cv0);
1134 n.bc.src[0].neg = 0;
1135 n.bc.src[0].abs = 0;
1136 n.bc.set_op(op);
1137 n.src.resize(2);
1138 return fold_alu_op2(n);
1139 }
1140 }
1141
1142 return false;
1143 }
1144
1145 apply_alu_dst_mod(n.bc, dv);
1146 assign_source(n.dst[0], get_const(dv));
1147 return true;
1148 }
1149
invert_setcc_condition(unsigned cc,bool & swap_args)1150 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1151 unsigned ncc = 0;
1152
1153 switch (cc) {
1154 case AF_CC_E: ncc = AF_CC_NE; break;
1155 case AF_CC_NE: ncc = AF_CC_E; break;
1156 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1157 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1158 default:
1159 assert(!"unexpected condition code");
1160 break;
1161 }
1162 return ncc;
1163 }
1164
get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1165 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1166
1167 if (int_dst && cmp_type == AF_FLOAT_CMP) {
1168 switch (cc) {
1169 case AF_CC_E: return ALU_OP2_SETE_DX10;
1170 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1171 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1172 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1173 }
1174 } else {
1175
1176 switch(cmp_type) {
1177 case AF_FLOAT_CMP: {
1178 switch (cc) {
1179 case AF_CC_E: return ALU_OP2_SETE;
1180 case AF_CC_NE: return ALU_OP2_SETNE;
1181 case AF_CC_GT: return ALU_OP2_SETGT;
1182 case AF_CC_GE: return ALU_OP2_SETGE;
1183 }
1184 break;
1185 }
1186 case AF_INT_CMP: {
1187 switch (cc) {
1188 case AF_CC_E: return ALU_OP2_SETE_INT;
1189 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1190 case AF_CC_GT: return ALU_OP2_SETGT_INT;
1191 case AF_CC_GE: return ALU_OP2_SETGE_INT;
1192 }
1193 break;
1194 }
1195 case AF_UINT_CMP: {
1196 switch (cc) {
1197 case AF_CC_E: return ALU_OP2_SETE_INT;
1198 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1199 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1200 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1201 }
1202 break;
1203 }
1204 }
1205 }
1206
1207 assert(!"unexpected cc&cmp_type combination");
1208 return ~0u;
1209 }
1210
get_predsetcc_op(unsigned cc,unsigned cmp_type)1211 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1212
1213 switch(cmp_type) {
1214 case AF_FLOAT_CMP: {
1215 switch (cc) {
1216 case AF_CC_E: return ALU_OP2_PRED_SETE;
1217 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1218 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1219 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1220 }
1221 break;
1222 }
1223 case AF_INT_CMP: {
1224 switch (cc) {
1225 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1226 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1227 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1228 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1229 }
1230 break;
1231 }
1232 case AF_UINT_CMP: {
1233 switch (cc) {
1234 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1235 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1236 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1237 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1238 }
1239 break;
1240 }
1241 }
1242
1243 assert(!"unexpected cc&cmp_type combination");
1244 return ~0u;
1245 }
1246
get_killcc_op(unsigned cc,unsigned cmp_type)1247 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1248
1249 switch(cmp_type) {
1250 case AF_FLOAT_CMP: {
1251 switch (cc) {
1252 case AF_CC_E: return ALU_OP2_KILLE;
1253 case AF_CC_NE: return ALU_OP2_KILLNE;
1254 case AF_CC_GT: return ALU_OP2_KILLGT;
1255 case AF_CC_GE: return ALU_OP2_KILLGE;
1256 }
1257 break;
1258 }
1259 case AF_INT_CMP: {
1260 switch (cc) {
1261 case AF_CC_E: return ALU_OP2_KILLE_INT;
1262 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1263 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1264 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1265 }
1266 break;
1267 }
1268 case AF_UINT_CMP: {
1269 switch (cc) {
1270 case AF_CC_E: return ALU_OP2_KILLE_INT;
1271 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1272 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1273 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1274 }
1275 break;
1276 }
1277 }
1278
1279 assert(!"unexpected cc&cmp_type combination");
1280 return ~0u;
1281 }
1282
get_cndcc_op(unsigned cc,unsigned cmp_type)1283 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1284
1285 switch(cmp_type) {
1286 case AF_FLOAT_CMP: {
1287 switch (cc) {
1288 case AF_CC_E: return ALU_OP3_CNDE;
1289 case AF_CC_GT: return ALU_OP3_CNDGT;
1290 case AF_CC_GE: return ALU_OP3_CNDGE;
1291 }
1292 break;
1293 }
1294 case AF_INT_CMP: {
1295 switch (cc) {
1296 case AF_CC_E: return ALU_OP3_CNDE_INT;
1297 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1298 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1299 }
1300 break;
1301 }
1302 }
1303
1304 assert(!"unexpected cc&cmp_type combination");
1305 return ~0u;
1306 }
1307
1308
convert_predset_to_set(shader & sh,alu_node * a)1309 void convert_predset_to_set(shader& sh, alu_node* a) {
1310
1311 unsigned flags = a->bc.op_ptr->flags;
1312 unsigned cc = flags & AF_CC_MASK;
1313 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1314
1315 bool swap_args = false;
1316
1317 cc = invert_setcc_condition(cc, swap_args);
1318
1319 unsigned newop = get_setcc_op(cc, cmp_type, true);
1320
1321 a->dst.resize(1);
1322 a->bc.set_op(newop);
1323
1324 if (swap_args) {
1325 std::swap(a->src[0], a->src[1]);
1326 std::swap(a->bc.src[0], a->bc.src[1]);
1327 }
1328
1329 a->bc.update_exec_mask = 0;
1330 a->bc.update_pred = 0;
1331 }
1332
1333 } // namespace r600_sb
1334