1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #include <cmath> 28 29 #include "sb_shader.h" 30 31 namespace r600_sb { 32 get_select_value_for_em(shader & sh,value * em)33 value* get_select_value_for_em(shader& sh, value* em) { 34 if (!em->def) 35 return NULL; 36 37 node *predset = em->def; 38 if (!predset->is_pred_set()) 39 return NULL; 40 41 alu_node *s = sh.clone(static_cast<alu_node*>(predset)); 42 convert_predset_to_set(sh, s); 43 44 predset->insert_after(s); 45 46 value* &d0 = s->dst[0]; 47 d0 = sh.create_temp_value(); 48 d0->def = s; 49 return d0; 50 } 51 convert_to_mov(alu_node & n,value * src,bool neg,bool abs)52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) { 53 n.src.resize(1); 54 n.src[0] = src; 55 n.bc.src[0].abs = abs; 56 n.bc.src[0].neg = neg; 57 n.bc.set_op(ALU_OP1_MOV); 58 } 59 expr_handler(shader & sh)60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {} 61 get_const(const literal & l)62 value * expr_handler::get_const(const literal &l) { 63 value *v = sh.get_const_value(l); 64 if (!v->gvn_source) 65 vt.add_value(v); 66 return v; 67 } 68 assign_source(value * dst,value * src)69 void expr_handler::assign_source(value *dst, value *src) { 70 dst->gvn_source = src->gvn_source; 71 } 72 equal(value * l,value * r)73 bool expr_handler::equal(value *l, value *r) { 74 75 assert(l != r); 76 77 if (l->gvalue() == r->gvalue()) 78 return true; 79 80 if (l->def && r->def) 81 return defs_equal(l, r); 82 83 if (l->is_rel() && r->is_rel()) 84 return ivars_equal(l, r); 85 86 return false; 87 } 88 ivars_equal(value * l,value * r)89 bool expr_handler::ivars_equal(value* l, value* r) { 90 if (l->rel->gvalue() == r->rel->gvalue() 91 && l->select == r->select) { 92 93 vvec &lv = l->mdef.empty() ? l->muse : l->mdef; 94 vvec &rv = r->mdef.empty() ? r->muse : r->mdef; 95 96 // FIXME: replace this with more precise aliasing test 97 return lv == rv; 98 } 99 return false; 100 } 101 defs_equal(value * l,value * r)102 bool expr_handler::defs_equal(value* l, value* r) { 103 104 node *d1 = l->def; 105 node *d2 = r->def; 106 107 if (d1->type != d2->type || d1->subtype != d2->subtype) 108 return false; 109 110 if (d1->is_pred_set() || d2->is_pred_set()) 111 return false; 112 113 if (d1->type == NT_OP) { 114 switch (d1->subtype) { 115 case NST_ALU_INST: 116 return ops_equal( 117 static_cast<alu_node*>(d1), 118 static_cast<alu_node*>(d2)); 119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1), 120 // static_cast<fetch_node*>(d2); 121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1), 122 // static_cast<cf_node*>(d2); 123 default: 124 break; 125 } 126 } 127 return false; 128 } 129 try_fold(value * v)130 bool expr_handler::try_fold(value* v) { 131 assert(!v->gvn_source); 132 133 if (v->def) 134 try_fold(v->def); 135 136 if (v->gvn_source) 137 return true; 138 139 return false; 140 } 141 try_fold(node * n)142 bool expr_handler::try_fold(node* n) { 143 return n->fold_dispatch(this); 144 } 145 fold(node & n)146 bool expr_handler::fold(node& n) { 147 if (n.subtype == NST_PHI) { 148 149 value *s = n.src[0]; 150 151 // FIXME disabling phi folding for registers for now, otherwise we lose 152 // control flow information in some cases 153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test) 154 // probably control flow transformation is required to enable it 155 if (s->is_sgpr()) 156 return false; 157 158 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) { 159 value *v = *I; 160 if (!s->v_equal(v)) 161 return false; 162 } 163 164 assign_source(n.dst[0], s); 165 } else { 166 assert(n.subtype == NST_PSI); 167 assert(n.src.size() >= 6); 168 169 value *s = n.src[2]; 170 assert(s->gvn_source); 171 172 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) { 173 value *v = *(I+2); 174 if (!s->v_equal(v)) 175 return false; 176 } 177 assign_source(n.dst[0], s); 178 } 179 return true; 180 } 181 fold(container_node & n)182 bool expr_handler::fold(container_node& n) { 183 return false; 184 } 185 fold_setcc(alu_node & n)186 bool expr_handler::fold_setcc(alu_node &n) { 187 188 value* v0 = n.src[0]->gvalue(); 189 value* v1 = n.src[1]->gvalue(); 190 191 assert(v0 && v1 && n.dst[0]); 192 193 unsigned flags = n.bc.op_ptr->flags; 194 unsigned cc = flags & AF_CC_MASK; 195 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 196 unsigned dst_type = flags & AF_DST_TYPE_MASK; 197 198 bool cond_result; 199 bool have_result = false; 200 201 bool isc0 = v0->is_const(); 202 bool isc1 = v1->is_const(); 203 204 literal dv, cv0, cv1; 205 206 if (isc0) { 207 cv0 = v0->get_const_value(); 208 apply_alu_src_mod(n.bc, 0, cv0); 209 } 210 211 if (isc1) { 212 cv1 = v1->get_const_value(); 213 apply_alu_src_mod(n.bc, 1, cv1); 214 } 215 216 if (isc0 && isc1) { 217 cond_result = evaluate_condition(flags, cv0, cv1); 218 have_result = true; 219 } else if (isc1) { 220 if (cmp_type == AF_FLOAT_CMP) { 221 if (n.bc.src[0].abs && !n.bc.src[0].neg) { 222 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) { 223 cond_result = true; 224 have_result = true; 225 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) { 226 cond_result = true; 227 have_result = true; 228 } 229 } else if (n.bc.src[0].abs && n.bc.src[0].neg) { 230 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) { 231 cond_result = false; 232 have_result = true; 233 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) { 234 cond_result = false; 235 have_result = true; 236 } 237 } 238 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) { 239 cond_result = true; 240 have_result = true; 241 } 242 } else if (isc0) { 243 if (cmp_type == AF_FLOAT_CMP) { 244 if (n.bc.src[1].abs && !n.bc.src[1].neg) { 245 if (cv0.f <= 0.0f && cc == AF_CC_GT) { 246 cond_result = false; 247 have_result = true; 248 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) { 249 cond_result = false; 250 have_result = true; 251 } 252 } else if (n.bc.src[1].abs && n.bc.src[1].neg) { 253 if (cv0.f >= 0.0f && cc == AF_CC_GE) { 254 cond_result = true; 255 have_result = true; 256 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) { 257 cond_result = true; 258 have_result = true; 259 } 260 } 261 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) { 262 cond_result = false; 263 have_result = true; 264 } 265 } else if (v0 == v1) { 266 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1]; 267 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) { 268 // NOTE can't handle float comparisons here because of NaNs 269 cond_result = (cc == AF_CC_E || cc == AF_CC_GE); 270 have_result = true; 271 } 272 } 273 274 if (have_result) { 275 literal result; 276 277 if (cond_result) 278 result = dst_type != AF_FLOAT_DST ? 279 literal(0xFFFFFFFFu) : literal(1.0f); 280 else 281 result = literal(0); 282 283 convert_to_mov(n, sh.get_const_value(result)); 284 return fold_alu_op1(n); 285 } 286 287 return false; 288 } 289 fold(alu_node & n)290 bool expr_handler::fold(alu_node& n) { 291 292 switch (n.bc.op_ptr->src_count) { 293 case 1: return fold_alu_op1(n); 294 case 2: return fold_alu_op2(n); 295 case 3: return fold_alu_op3(n); 296 default: 297 assert(0); 298 } 299 return false; 300 } 301 fold(fetch_node & n)302 bool expr_handler::fold(fetch_node& n) { 303 304 unsigned chan = 0; 305 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { 306 value* &v = *I; 307 if (v) { 308 if (n.bc.dst_sel[chan] == SEL_0) 309 assign_source(*I, get_const(0.0f)); 310 else if (n.bc.dst_sel[chan] == SEL_1) 311 assign_source(*I, get_const(1.0f)); 312 } 313 ++chan; 314 } 315 return false; 316 } 317 fold(cf_node & n)318 bool expr_handler::fold(cf_node& n) { 319 return false; 320 } 321 apply_alu_src_mod(const bc_alu & bc,unsigned src,literal & v)322 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src, 323 literal &v) { 324 const bc_alu_src &s = bc.src[src]; 325 326 if (s.abs) 327 v = fabs(v.f); 328 if (s.neg) 329 v = -v.f; 330 } 331 apply_alu_dst_mod(const bc_alu & bc,literal & v)332 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) { 333 float omod_coeff[] = {2.0f, 4.0, 0.5f}; 334 335 if (bc.omod) 336 v = v.f * omod_coeff[bc.omod - 1]; 337 if (bc.clamp) 338 v = float_clamp(v.f); 339 } 340 args_equal(const vvec & l,const vvec & r)341 bool expr_handler::args_equal(const vvec &l, const vvec &r) { 342 343 assert(l.size() == r.size()); 344 345 int s = l.size(); 346 347 for (int k = 0; k < s; ++k) { 348 if (!l[k]->v_equal(r[k])) 349 return false; 350 } 351 352 return true; 353 } 354 ops_equal(const alu_node * l,const alu_node * r)355 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) { 356 const bc_alu &b0 = l->bc; 357 const bc_alu &b1 = r->bc; 358 359 if (b0.op != b1.op) 360 return false; 361 362 unsigned src_count = b0.op_ptr->src_count; 363 364 if (b0.index_mode != b1.index_mode) 365 return false; 366 367 if (b0.clamp != b1.clamp || b0.omod != b1.omod) 368 return false; 369 370 for (unsigned s = 0; s < src_count; ++s) { 371 const bc_alu_src &s0 = b0.src[s]; 372 const bc_alu_src &s1 = b1.src[s]; 373 374 if (s0.abs != s1.abs || s0.neg != s1.neg) 375 return false; 376 } 377 return args_equal(l->src, r->src); 378 } 379 fold_alu_op1(alu_node & n)380 bool expr_handler::fold_alu_op1(alu_node& n) { 381 382 assert(!n.src.empty()); 383 if (n.src.empty()) 384 return false; 385 386 value* v0 = n.src[0]->gvalue(); 387 388 assert(v0 && n.dst[0]); 389 390 if (!v0->is_const()) { 391 // handle (MOV -(MOV -x)) => (MOV x) 392 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs 393 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) { 394 alu_node *sd = static_cast<alu_node*>(v0->def); 395 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs && 396 sd->bc.src[0].neg) { 397 n.src[0] = sd->src[0]; 398 n.bc.src[0].neg = 0; 399 v0 = n.src[0]->gvalue(); 400 } 401 } 402 403 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT || 404 n.bc.op == ALU_OP1_MOVA_GPR_INT) 405 && n.bc.clamp == 0 && n.bc.omod == 0 406 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 && 407 n.src.size() == 1 /* RIM/SIM can be appended as additional values */) { 408 assign_source(n.dst[0], v0); 409 return true; 410 } 411 return false; 412 } 413 414 literal dv, cv = v0->get_const_value(); 415 apply_alu_src_mod(n.bc, 0, cv); 416 417 switch (n.bc.op) { 418 case ALU_OP1_CEIL: dv = ceil(cv.f); break; 419 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break; 420 case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break; 421 case ALU_OP1_FLOOR: dv = floor(cv.f); break; 422 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ???? 423 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break; 424 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break; 425 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break; 426 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break; 427 case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break; 428 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break; 429 case ALU_OP1_LOG_CLAMPED: 430 case ALU_OP1_LOG_IEEE: 431 if (cv.f != 0.0f) 432 dv = log2(cv.f); 433 else 434 // don't fold to NAN, let the GPU handle it for now 435 // (prevents degenerate LIT tests from failing) 436 return false; 437 break; 438 case ALU_OP1_MOV: dv = cv; break; 439 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ??? 440 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break; 441 // case ALU_OP1_MOVA_GPR_INT: 442 case ALU_OP1_NOT_INT: dv = ~cv.i; break; 443 case ALU_OP1_PRED_SET_INV: 444 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break; 445 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break; 446 case ALU_OP1_RECIPSQRT_CLAMPED: 447 case ALU_OP1_RECIPSQRT_FF: 448 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break; 449 case ALU_OP1_RECIP_CLAMPED: 450 case ALU_OP1_RECIP_FF: 451 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break; 452 // case ALU_OP1_RECIP_INT: 453 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break; 454 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break; 455 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break; 456 case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break; 457 case ALU_OP1_TRUNC: dv = trunc(cv.f); break; 458 459 default: 460 return false; 461 } 462 463 apply_alu_dst_mod(n.bc, dv); 464 assign_source(n.dst[0], get_const(dv)); 465 return true; 466 } 467 fold_mul_add(alu_node * n)468 bool expr_handler::fold_mul_add(alu_node *n) { 469 470 bool ieee; 471 value* v0 = n->src[0]->gvalue(); 472 473 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ? 474 static_cast<alu_node*>(v0->def) : NULL; 475 476 if (d0) { 477 if (d0->is_alu_op(ALU_OP2_MUL_IEEE)) 478 ieee = true; 479 else if (d0->is_alu_op(ALU_OP2_MUL)) 480 ieee = false; 481 else 482 return false; 483 484 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs && 485 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod && 486 !d0->bc.clamp && !n->bc.omod && 487 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() || 488 !n->src[1]->is_kcache())) { 489 490 bool mul_neg = n->bc.src[0].neg; 491 492 n->src.resize(3); 493 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 494 n->src[2] = n->src[1]; 495 n->bc.src[2] = n->bc.src[1]; 496 n->src[0] = d0->src[0]; 497 n->bc.src[0] = d0->bc.src[0]; 498 n->src[1] = d0->src[1]; 499 n->bc.src[1] = d0->bc.src[1]; 500 501 n->bc.src[0].neg ^= mul_neg; 502 503 fold_alu_op3(*n); 504 return true; 505 } 506 } 507 508 value* v1 = n->src[1]->gvalue(); 509 510 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ? 511 static_cast<alu_node*>(v1->def) : NULL; 512 513 if (d1) { 514 if (d1->is_alu_op(ALU_OP2_MUL_IEEE)) 515 ieee = true; 516 else if (d1->is_alu_op(ALU_OP2_MUL)) 517 ieee = false; 518 else 519 return false; 520 521 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs && 522 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod && 523 !d1->bc.clamp && !n->bc.omod && 524 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() || 525 !n->src[0]->is_kcache())) { 526 527 bool mul_neg = n->bc.src[1].neg; 528 529 n->src.resize(3); 530 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 531 n->src[2] = n->src[0]; 532 n->bc.src[2] = n->bc.src[0]; 533 n->src[1] = d1->src[1]; 534 n->bc.src[1] = d1->bc.src[1]; 535 n->src[0] = d1->src[0]; 536 n->bc.src[0] = d1->bc.src[0]; 537 538 n->bc.src[1].neg ^= mul_neg; 539 540 fold_alu_op3(*n); 541 return true; 542 } 543 } 544 545 return false; 546 } 547 eval_const_op(unsigned op,literal & r,literal cv0,literal cv1)548 bool expr_handler::eval_const_op(unsigned op, literal &r, 549 literal cv0, literal cv1) { 550 551 switch (op) { 552 case ALU_OP2_ADD: r = cv0.f + cv1.f; break; 553 case ALU_OP2_ADDC_UINT: 554 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break; 555 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break; 556 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break; 557 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break; 558 case ALU_OP2_BFM_INT: 559 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break; 560 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break; 561 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break; 562 case ALU_OP2_MAX: 563 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break; 564 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break; 565 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break; 566 case ALU_OP2_MIN: 567 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break; 568 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break; 569 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break; 570 case ALU_OP2_MUL: 571 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break; 572 case ALU_OP2_MULHI_INT: 573 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break; 574 case ALU_OP2_MULHI_UINT: 575 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break; 576 case ALU_OP2_MULLO_INT: 577 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 578 case ALU_OP2_MULLO_UINT: 579 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 580 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break; 581 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break; 582 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break; 583 584 default: 585 return false; 586 } 587 588 return true; 589 } 590 591 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5) fold_assoc(alu_node * n)592 bool expr_handler::fold_assoc(alu_node *n) { 593 594 alu_node *a = n; 595 literal cr; 596 597 int last_arg = -3; 598 599 unsigned op = n->bc.op; 600 bool allow_neg = false, cur_neg = false; 601 bool distribute_neg = false; 602 603 switch(op) { 604 case ALU_OP2_ADD: 605 distribute_neg = true; 606 allow_neg = true; 607 break; 608 case ALU_OP2_MUL: 609 case ALU_OP2_MUL_IEEE: 610 allow_neg = true; 611 break; 612 case ALU_OP3_MULADD: 613 allow_neg = true; 614 op = ALU_OP2_MUL; 615 break; 616 case ALU_OP3_MULADD_IEEE: 617 allow_neg = true; 618 op = ALU_OP2_MUL_IEEE; 619 break; 620 default: 621 if (n->bc.op_ptr->src_count != 2) 622 return false; 623 } 624 625 // check if we can evaluate the op 626 if (!eval_const_op(op, cr, literal(0), literal(0))) 627 return false; 628 629 while (true) { 630 631 value *v0 = a->src[0]->gvalue(); 632 value *v1 = a->src[1]->gvalue(); 633 634 last_arg = -2; 635 636 if (v1->is_const()) { 637 literal arg = v1->get_const_value(); 638 apply_alu_src_mod(a->bc, 1, arg); 639 if (cur_neg && distribute_neg) 640 arg.f = -arg.f; 641 642 if (a == n) 643 cr = arg; 644 else 645 eval_const_op(op, cr, cr, arg); 646 647 if (v0->def) { 648 alu_node *d0 = static_cast<alu_node*>(v0->def); 649 if ((d0->is_alu_op(op) || 650 (op == ALU_OP2_MUL_IEEE && 651 d0->is_alu_op(ALU_OP2_MUL))) && 652 !d0->bc.omod && !d0->bc.clamp && 653 !a->bc.src[0].abs && 654 (!a->bc.src[0].neg || allow_neg)) { 655 cur_neg ^= a->bc.src[0].neg; 656 a = d0; 657 continue; 658 } 659 } 660 last_arg = 0; 661 662 } 663 664 if (v0->is_const()) { 665 literal arg = v0->get_const_value(); 666 apply_alu_src_mod(a->bc, 0, arg); 667 if (cur_neg && distribute_neg) 668 arg.f = -arg.f; 669 670 if (last_arg == 0) { 671 eval_const_op(op, cr, cr, arg); 672 last_arg = -1; 673 break; 674 } 675 676 if (a == n) 677 cr = arg; 678 else 679 eval_const_op(op, cr, cr, arg); 680 681 if (v1->def) { 682 alu_node *d1 = static_cast<alu_node*>(v1->def); 683 if ((d1->is_alu_op(op) || 684 (op == ALU_OP2_MUL_IEEE && 685 d1->is_alu_op(ALU_OP2_MUL))) && 686 !d1->bc.omod && !d1->bc.clamp && 687 !a->bc.src[1].abs && 688 (!a->bc.src[1].neg || allow_neg)) { 689 cur_neg ^= a->bc.src[1].neg; 690 a = d1; 691 continue; 692 } 693 } 694 695 last_arg = 1; 696 } 697 698 break; 699 }; 700 701 if (last_arg == -1) { 702 // result is const 703 apply_alu_dst_mod(n->bc, cr); 704 705 if (n->bc.op == op) { 706 convert_to_mov(*n, sh.get_const_value(cr)); 707 fold_alu_op1(*n); 708 return true; 709 } else { // MULADD => ADD 710 n->src[0] = n->src[2]; 711 n->bc.src[0] = n->bc.src[2]; 712 n->src[1] = sh.get_const_value(cr); 713 memset(&n->bc.src[1], 0, sizeof(bc_alu_src)); 714 715 n->src.resize(2); 716 n->bc.set_op(ALU_OP2_ADD); 717 } 718 } else if (last_arg >= 0) { 719 n->src[0] = a->src[last_arg]; 720 n->bc.src[0] = a->bc.src[last_arg]; 721 n->bc.src[0].neg ^= cur_neg; 722 n->src[1] = sh.get_const_value(cr); 723 memset(&n->bc.src[1], 0, sizeof(bc_alu_src)); 724 } 725 726 return false; 727 } 728 fold_alu_op2(alu_node & n)729 bool expr_handler::fold_alu_op2(alu_node& n) { 730 731 if (n.src.size() < 2) 732 return false; 733 734 unsigned flags = n.bc.op_ptr->flags; 735 736 if (flags & AF_SET) { 737 return fold_setcc(n); 738 } 739 740 if (!sh.safe_math && (flags & AF_M_ASSOC)) { 741 if (fold_assoc(&n)) 742 return true; 743 } 744 745 value* v0 = n.src[0]->gvalue(); 746 value* v1 = n.src[1]->gvalue(); 747 748 assert(v0 && v1); 749 750 // handle some operations with equal args, e.g. x + x => x * 2 751 if (v0 == v1) { 752 if (n.bc.src[0].neg == n.bc.src[1].neg && 753 n.bc.src[0].abs == n.bc.src[1].abs) { 754 switch (n.bc.op) { 755 case ALU_OP2_MIN: // (MIN x, x) => (MOV x) 756 case ALU_OP2_MAX: 757 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); 758 return fold_alu_op1(n); 759 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) 760 if (!sh.safe_math) { 761 n.src[1] = sh.get_const_value(2.0f); 762 memset(&n.bc.src[1], 0, sizeof(bc_alu_src)); 763 n.bc.set_op(ALU_OP2_MUL); 764 return fold_alu_op2(n); 765 } 766 break; 767 } 768 } 769 if (n.bc.src[0].neg != n.bc.src[1].neg && 770 n.bc.src[0].abs == n.bc.src[1].abs) { 771 switch (n.bc.op) { 772 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0) 773 if (!sh.safe_math) { 774 convert_to_mov(n, sh.get_const_value(literal(0))); 775 return fold_alu_op1(n); 776 } 777 break; 778 } 779 } 780 } 781 782 if (n.bc.op == ALU_OP2_ADD) { 783 if (fold_mul_add(&n)) 784 return true; 785 } 786 787 bool isc0 = v0->is_const(); 788 bool isc1 = v1->is_const(); 789 790 if (!isc0 && !isc1) 791 return false; 792 793 literal dv, cv0, cv1; 794 795 if (isc0) { 796 cv0 = v0->get_const_value(); 797 apply_alu_src_mod(n.bc, 0, cv0); 798 } 799 800 if (isc1) { 801 cv1 = v1->get_const_value(); 802 apply_alu_src_mod(n.bc, 1, cv1); 803 } 804 805 if (isc0 && isc1) { 806 807 if (!eval_const_op(n.bc.op, dv, cv0, cv1)) 808 return false; 809 810 } else { // one source is const 811 812 if (isc0 && cv0 == literal(0)) { 813 switch (n.bc.op) { 814 case ALU_OP2_ADD: 815 case ALU_OP2_ADD_INT: 816 case ALU_OP2_MAX_UINT: 817 case ALU_OP2_OR_INT: 818 case ALU_OP2_XOR_INT: 819 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 820 return fold_alu_op1(n); 821 case ALU_OP2_AND_INT: 822 case ALU_OP2_ASHR_INT: 823 case ALU_OP2_LSHL_INT: 824 case ALU_OP2_LSHR_INT: 825 case ALU_OP2_MIN_UINT: 826 case ALU_OP2_MUL: 827 case ALU_OP2_MULHI_UINT: 828 case ALU_OP2_MULLO_UINT: 829 convert_to_mov(n, sh.get_const_value(literal(0))); 830 return fold_alu_op1(n); 831 } 832 } else if (isc1 && cv1 == literal(0)) { 833 switch (n.bc.op) { 834 case ALU_OP2_ADD: 835 case ALU_OP2_ADD_INT: 836 case ALU_OP2_ASHR_INT: 837 case ALU_OP2_LSHL_INT: 838 case ALU_OP2_LSHR_INT: 839 case ALU_OP2_MAX_UINT: 840 case ALU_OP2_OR_INT: 841 case ALU_OP2_SUB_INT: 842 case ALU_OP2_XOR_INT: 843 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 844 return fold_alu_op1(n); 845 case ALU_OP2_AND_INT: 846 case ALU_OP2_MIN_UINT: 847 case ALU_OP2_MUL: 848 case ALU_OP2_MULHI_UINT: 849 case ALU_OP2_MULLO_UINT: 850 convert_to_mov(n, sh.get_const_value(literal(0))); 851 return fold_alu_op1(n); 852 } 853 } else if (isc0 && cv0 == literal(1.0f)) { 854 switch (n.bc.op) { 855 case ALU_OP2_MUL: 856 case ALU_OP2_MUL_IEEE: 857 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 858 return fold_alu_op1(n); 859 } 860 } else if (isc1 && cv1 == literal(1.0f)) { 861 switch (n.bc.op) { 862 case ALU_OP2_MUL: 863 case ALU_OP2_MUL_IEEE: 864 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 865 return fold_alu_op1(n); 866 } 867 } 868 869 return false; 870 } 871 872 apply_alu_dst_mod(n.bc, dv); 873 assign_source(n.dst[0], get_const(dv)); 874 return true; 875 } 876 evaluate_condition(unsigned alu_cnd_flags,literal s1,literal s2)877 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags, 878 literal s1, literal s2) { 879 880 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK; 881 unsigned cc = alu_cnd_flags & AF_CC_MASK; 882 883 switch (cmp_type) { 884 case AF_FLOAT_CMP: { 885 switch (cc) { 886 case AF_CC_E : return s1.f == s2.f; 887 case AF_CC_GT: return s1.f > s2.f; 888 case AF_CC_GE: return s1.f >= s2.f; 889 case AF_CC_NE: return s1.f != s2.f; 890 case AF_CC_LT: return s1.f < s2.f; 891 case AF_CC_LE: return s1.f <= s2.f; 892 default: 893 assert(!"invalid condition code"); 894 return false; 895 } 896 } 897 case AF_INT_CMP: { 898 switch (cc) { 899 case AF_CC_E : return s1.i == s2.i; 900 case AF_CC_GT: return s1.i > s2.i; 901 case AF_CC_GE: return s1.i >= s2.i; 902 case AF_CC_NE: return s1.i != s2.i; 903 case AF_CC_LT: return s1.i < s2.i; 904 case AF_CC_LE: return s1.i <= s2.i; 905 default: 906 assert(!"invalid condition code"); 907 return false; 908 } 909 } 910 case AF_UINT_CMP: { 911 switch (cc) { 912 case AF_CC_E : return s1.u == s2.u; 913 case AF_CC_GT: return s1.u > s2.u; 914 case AF_CC_GE: return s1.u >= s2.u; 915 case AF_CC_NE: return s1.u != s2.u; 916 case AF_CC_LT: return s1.u < s2.u; 917 case AF_CC_LE: return s1.u <= s2.u; 918 default: 919 assert(!"invalid condition code"); 920 return false; 921 } 922 } 923 default: 924 assert(!"invalid cmp_type"); 925 return false; 926 } 927 } 928 fold_alu_op3(alu_node & n)929 bool expr_handler::fold_alu_op3(alu_node& n) { 930 931 if (n.src.size() < 3) 932 return false; 933 934 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) { 935 if (fold_assoc(&n)) 936 return true; 937 } 938 939 value* v0 = n.src[0]->gvalue(); 940 value* v1 = n.src[1]->gvalue(); 941 value* v2 = n.src[2]->gvalue(); 942 943 assert(v0 && v1 && v2 && n.dst[0]); 944 945 bool isc0 = v0->is_const(); 946 bool isc1 = v1->is_const(); 947 bool isc2 = v2->is_const(); 948 949 literal dv, cv0, cv1, cv2; 950 951 if (isc0) { 952 cv0 = v0->get_const_value(); 953 apply_alu_src_mod(n.bc, 0, cv0); 954 } 955 956 if (isc1) { 957 cv1 = v1->get_const_value(); 958 apply_alu_src_mod(n.bc, 1, cv1); 959 } 960 961 if (isc2) { 962 cv2 = v2->get_const_value(); 963 apply_alu_src_mod(n.bc, 2, cv2); 964 } 965 966 unsigned flags = n.bc.op_ptr->flags; 967 968 if (flags & AF_CMOV) { 969 int src = 0; 970 971 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) { 972 // result doesn't depend on condition, convert to MOV 973 src = 1; 974 } else if (isc0) { 975 // src0 is const, condition can be evaluated, convert to MOV 976 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK | 977 AF_CMP_TYPE_MASK), cv0, literal(0)); 978 src = cond ? 1 : 2; 979 } 980 981 if (src) { 982 // if src is selected, convert to MOV 983 convert_to_mov(n, n.src[src], n.bc.src[src].neg); 984 return fold_alu_op1(n); 985 } 986 } 987 988 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b)) 989 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD || 990 n.bc.op == ALU_OP3_MULADD_IEEE)) { 991 992 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 993 ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 994 995 if (!isc2 && v2->def && v2->def->is_alu_op(op)) { 996 997 alu_node *md = static_cast<alu_node*>(v2->def); 998 value *mv0 = md->src[0]->gvalue(); 999 value *mv1 = md->src[1]->gvalue(); 1000 1001 int es0 = -1, es1; 1002 1003 if (v0 == mv0) { 1004 es0 = 0; 1005 es1 = 0; 1006 } else if (v0 == mv1) { 1007 es0 = 0; 1008 es1 = 1; 1009 } else if (v1 == mv0) { 1010 es0 = 1; 1011 es1 = 0; 1012 } else if (v1 == mv1) { 1013 es0 = 1; 1014 es1 = 1; 1015 } 1016 1017 if (es0 != -1) { 1018 value *va0 = es0 == 0 ? v1 : v0; 1019 value *va1 = es1 == 0 ? mv1 : mv0; 1020 1021 alu_node *add = sh.create_alu(); 1022 add->bc.set_op(ALU_OP2_ADD); 1023 1024 add->dst.resize(1); 1025 add->src.resize(2); 1026 1027 value *t = sh.create_temp_value(); 1028 t->def = add; 1029 add->dst[0] = t; 1030 add->src[0] = va0; 1031 add->src[1] = va1; 1032 add->bc.src[0] = n.bc.src[!es0]; 1033 add->bc.src[1] = md->bc.src[!es1]; 1034 1035 add->bc.src[1].neg ^= n.bc.src[2].neg ^ 1036 (n.bc.src[es0].neg != md->bc.src[es1].neg); 1037 1038 n.insert_before(add); 1039 vt.add_value(t); 1040 1041 t = t->gvalue(); 1042 1043 if (es0 == 1) { 1044 n.src[0] = n.src[1]; 1045 n.bc.src[0] = n.bc.src[1]; 1046 } 1047 1048 n.src[1] = t; 1049 memset(&n.bc.src[1], 0, sizeof(bc_alu_src)); 1050 1051 n.src.resize(2); 1052 1053 n.bc.set_op(op); 1054 return fold_alu_op2(n); 1055 } 1056 } 1057 } 1058 1059 if (!isc0 && !isc1 && !isc2) 1060 return false; 1061 1062 if (isc0 && isc1 && isc2) { 1063 switch (n.bc.op) { 1064 case ALU_OP3_MULADD_IEEE: 1065 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break; 1066 1067 // TODO 1068 1069 default: 1070 return false; 1071 } 1072 } else { 1073 if (isc0 && isc1) { 1074 switch (n.bc.op) { 1075 case ALU_OP3_MULADD: 1076 case ALU_OP3_MULADD_IEEE: 1077 dv = cv0.f * cv1.f; 1078 n.bc.set_op(ALU_OP2_ADD); 1079 n.src[0] = sh.get_const_value(dv); 1080 memset(&n.bc.src[0], 0, sizeof(bc_alu_src)); 1081 n.src[1] = n.src[2]; 1082 n.bc.src[1] = n.bc.src[2]; 1083 n.src.resize(2); 1084 return fold_alu_op2(n); 1085 } 1086 } 1087 1088 if (n.bc.op == ALU_OP3_MULADD) { 1089 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) { 1090 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs); 1091 return fold_alu_op1(n); 1092 } 1093 } 1094 1095 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) { 1096 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 1097 ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 1098 1099 if (isc1 && v0 == v2) { 1100 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f); 1101 n.src[1] = sh.get_const_value(cv1); 1102 n.bc.src[1].neg = 0; 1103 n.bc.src[1].abs = 0; 1104 n.bc.set_op(op); 1105 n.src.resize(2); 1106 return fold_alu_op2(n); 1107 } else if (isc0 && v1 == v2) { 1108 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f); 1109 n.src[0] = sh.get_const_value(cv0); 1110 n.bc.src[0].neg = 0; 1111 n.bc.src[0].abs = 0; 1112 n.bc.set_op(op); 1113 n.src.resize(2); 1114 return fold_alu_op2(n); 1115 } 1116 } 1117 1118 return false; 1119 } 1120 1121 apply_alu_dst_mod(n.bc, dv); 1122 assign_source(n.dst[0], get_const(dv)); 1123 return true; 1124 } 1125 invert_setcc_condition(unsigned cc,bool & swap_args)1126 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) { 1127 unsigned ncc = 0; 1128 1129 switch (cc) { 1130 case AF_CC_E: ncc = AF_CC_NE; break; 1131 case AF_CC_NE: ncc = AF_CC_E; break; 1132 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break; 1133 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break; 1134 default: 1135 assert(!"unexpected condition code"); 1136 break; 1137 } 1138 return ncc; 1139 } 1140 get_setcc_op(unsigned cc,unsigned cmp_type,bool int_dst)1141 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) { 1142 1143 if (int_dst && cmp_type == AF_FLOAT_CMP) { 1144 switch (cc) { 1145 case AF_CC_E: return ALU_OP2_SETE_DX10; 1146 case AF_CC_NE: return ALU_OP2_SETNE_DX10; 1147 case AF_CC_GT: return ALU_OP2_SETGT_DX10; 1148 case AF_CC_GE: return ALU_OP2_SETGE_DX10; 1149 } 1150 } else { 1151 1152 switch(cmp_type) { 1153 case AF_FLOAT_CMP: { 1154 switch (cc) { 1155 case AF_CC_E: return ALU_OP2_SETE; 1156 case AF_CC_NE: return ALU_OP2_SETNE; 1157 case AF_CC_GT: return ALU_OP2_SETGT; 1158 case AF_CC_GE: return ALU_OP2_SETGE; 1159 } 1160 break; 1161 } 1162 case AF_INT_CMP: { 1163 switch (cc) { 1164 case AF_CC_E: return ALU_OP2_SETE_INT; 1165 case AF_CC_NE: return ALU_OP2_SETNE_INT; 1166 case AF_CC_GT: return ALU_OP2_SETGT_INT; 1167 case AF_CC_GE: return ALU_OP2_SETGE_INT; 1168 } 1169 break; 1170 } 1171 case AF_UINT_CMP: { 1172 switch (cc) { 1173 case AF_CC_E: return ALU_OP2_SETE_INT; 1174 case AF_CC_NE: return ALU_OP2_SETNE_INT; 1175 case AF_CC_GT: return ALU_OP2_SETGT_UINT; 1176 case AF_CC_GE: return ALU_OP2_SETGE_UINT; 1177 } 1178 break; 1179 } 1180 } 1181 } 1182 1183 assert(!"unexpected cc&cmp_type combination"); 1184 return ~0u; 1185 } 1186 get_predsetcc_op(unsigned cc,unsigned cmp_type)1187 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) { 1188 1189 switch(cmp_type) { 1190 case AF_FLOAT_CMP: { 1191 switch (cc) { 1192 case AF_CC_E: return ALU_OP2_PRED_SETE; 1193 case AF_CC_NE: return ALU_OP2_PRED_SETNE; 1194 case AF_CC_GT: return ALU_OP2_PRED_SETGT; 1195 case AF_CC_GE: return ALU_OP2_PRED_SETGE; 1196 } 1197 break; 1198 } 1199 case AF_INT_CMP: { 1200 switch (cc) { 1201 case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1202 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1203 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT; 1204 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT; 1205 } 1206 break; 1207 } 1208 case AF_UINT_CMP: { 1209 switch (cc) { 1210 case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1211 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1212 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT; 1213 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT; 1214 } 1215 break; 1216 } 1217 } 1218 1219 assert(!"unexpected cc&cmp_type combination"); 1220 return ~0u; 1221 } 1222 get_killcc_op(unsigned cc,unsigned cmp_type)1223 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) { 1224 1225 switch(cmp_type) { 1226 case AF_FLOAT_CMP: { 1227 switch (cc) { 1228 case AF_CC_E: return ALU_OP2_KILLE; 1229 case AF_CC_NE: return ALU_OP2_KILLNE; 1230 case AF_CC_GT: return ALU_OP2_KILLGT; 1231 case AF_CC_GE: return ALU_OP2_KILLGE; 1232 } 1233 break; 1234 } 1235 case AF_INT_CMP: { 1236 switch (cc) { 1237 case AF_CC_E: return ALU_OP2_KILLE_INT; 1238 case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1239 case AF_CC_GT: return ALU_OP2_KILLGT_INT; 1240 case AF_CC_GE: return ALU_OP2_KILLGE_INT; 1241 } 1242 break; 1243 } 1244 case AF_UINT_CMP: { 1245 switch (cc) { 1246 case AF_CC_E: return ALU_OP2_KILLE_INT; 1247 case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1248 case AF_CC_GT: return ALU_OP2_KILLGT_UINT; 1249 case AF_CC_GE: return ALU_OP2_KILLGE_UINT; 1250 } 1251 break; 1252 } 1253 } 1254 1255 assert(!"unexpected cc&cmp_type combination"); 1256 return ~0u; 1257 } 1258 get_cndcc_op(unsigned cc,unsigned cmp_type)1259 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) { 1260 1261 switch(cmp_type) { 1262 case AF_FLOAT_CMP: { 1263 switch (cc) { 1264 case AF_CC_E: return ALU_OP3_CNDE; 1265 case AF_CC_GT: return ALU_OP3_CNDGT; 1266 case AF_CC_GE: return ALU_OP3_CNDGE; 1267 } 1268 break; 1269 } 1270 case AF_INT_CMP: { 1271 switch (cc) { 1272 case AF_CC_E: return ALU_OP3_CNDE_INT; 1273 case AF_CC_GT: return ALU_OP3_CNDGT_INT; 1274 case AF_CC_GE: return ALU_OP3_CNDGE_INT; 1275 } 1276 break; 1277 } 1278 } 1279 1280 assert(!"unexpected cc&cmp_type combination"); 1281 return ~0u; 1282 } 1283 1284 convert_predset_to_set(shader & sh,alu_node * a)1285 void convert_predset_to_set(shader& sh, alu_node* a) { 1286 1287 unsigned flags = a->bc.op_ptr->flags; 1288 unsigned cc = flags & AF_CC_MASK; 1289 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 1290 1291 bool swap_args = false; 1292 1293 cc = invert_setcc_condition(cc, swap_args); 1294 1295 unsigned newop = get_setcc_op(cc, cmp_type, true); 1296 1297 a->dst.resize(1); 1298 a->bc.set_op(newop); 1299 1300 if (swap_args) { 1301 std::swap(a->src[0], a->src[1]); 1302 std::swap(a->bc.src[0], a->bc.src[1]); 1303 } 1304 1305 a->bc.update_exec_mask = 0; 1306 a->bc.update_pred = 0; 1307 } 1308 1309 } // namespace r600_sb 1310