1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Shader.hpp" 16 17 #include "VertexShader.hpp" 18 #include "PixelShader.hpp" 19 #include "Math.hpp" 20 #include "Debug.hpp" 21 22 #include <set> 23 #include <fstream> 24 #include <sstream> 25 #include <stdarg.h> 26 27 namespace sw 28 { 29 volatile int Shader::serialCounter = 1; 30 OPCODE_DP(int i)31 Shader::Opcode Shader::OPCODE_DP(int i) 32 { 33 switch(i) 34 { 35 default: ASSERT(false); 36 case 1: return OPCODE_DP1; 37 case 2: return OPCODE_DP2; 38 case 3: return OPCODE_DP3; 39 case 4: return OPCODE_DP4; 40 } 41 } 42 OPCODE_LEN(int i)43 Shader::Opcode Shader::OPCODE_LEN(int i) 44 { 45 switch(i) 46 { 47 default: ASSERT(false); 48 case 1: return OPCODE_ABS; 49 case 2: return OPCODE_LEN2; 50 case 3: return OPCODE_LEN3; 51 case 4: return OPCODE_LEN4; 52 } 53 } 54 OPCODE_DIST(int i)55 Shader::Opcode Shader::OPCODE_DIST(int i) 56 { 57 switch(i) 58 { 59 default: ASSERT(false); 60 case 1: return OPCODE_DIST1; 61 case 2: return OPCODE_DIST2; 62 case 3: return OPCODE_DIST3; 63 case 4: return OPCODE_DIST4; 64 } 65 } 66 OPCODE_NRM(int i)67 Shader::Opcode Shader::OPCODE_NRM(int i) 68 { 69 switch(i) 70 { 71 default: ASSERT(false); 72 case 1: return OPCODE_SGN; 73 case 2: return OPCODE_NRM2; 74 case 3: return OPCODE_NRM3; 75 case 4: return OPCODE_NRM4; 76 } 77 } 78 OPCODE_FORWARD(int i)79 Shader::Opcode Shader::OPCODE_FORWARD(int i) 80 { 81 switch(i) 82 { 83 default: ASSERT(false); 84 case 1: return OPCODE_FORWARD1; 85 case 2: return OPCODE_FORWARD2; 86 case 3: return OPCODE_FORWARD3; 87 case 4: return OPCODE_FORWARD4; 88 } 89 } 90 OPCODE_REFLECT(int i)91 Shader::Opcode Shader::OPCODE_REFLECT(int i) 92 { 93 switch(i) 94 { 95 default: ASSERT(false); 96 case 1: return OPCODE_REFLECT1; 97 case 2: return OPCODE_REFLECT2; 98 case 3: return OPCODE_REFLECT3; 99 case 4: return OPCODE_REFLECT4; 100 } 101 } 102 OPCODE_REFRACT(int i)103 Shader::Opcode Shader::OPCODE_REFRACT(int i) 104 { 105 switch(i) 106 { 107 default: ASSERT(false); 108 case 1: return OPCODE_REFRACT1; 109 case 2: return OPCODE_REFRACT2; 110 case 3: return OPCODE_REFRACT3; 111 case 4: return OPCODE_REFRACT4; 112 } 113 } 114 Instruction(Opcode opcode)115 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) 116 { 117 control = CONTROL_RESERVED0; 118 119 predicate = false; 120 predicateNot = false; 121 predicateSwizzle = 0xE4; 122 123 coissue = false; 124 samplerType = SAMPLER_UNKNOWN; 125 usage = USAGE_POSITION; 126 usageIndex = 0; 127 } 128 Instruction(const unsigned long * token,int size,unsigned char majorVersion)129 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) 130 { 131 parseOperationToken(*token++, majorVersion); 132 133 samplerType = SAMPLER_UNKNOWN; 134 usage = USAGE_POSITION; 135 usageIndex = 0; 136 137 if(opcode == OPCODE_IF || 138 opcode == OPCODE_IFC || 139 opcode == OPCODE_LOOP || 140 opcode == OPCODE_REP || 141 opcode == OPCODE_BREAKC || 142 opcode == OPCODE_BREAKP) // No destination operand 143 { 144 if(size > 0) parseSourceToken(0, token++, majorVersion); 145 if(size > 1) parseSourceToken(1, token++, majorVersion); 146 if(size > 2) parseSourceToken(2, token++, majorVersion); 147 if(size > 3) ASSERT(false); 148 } 149 else if(opcode == OPCODE_DCL) 150 { 151 parseDeclarationToken(*token++); 152 parseDestinationToken(token++, majorVersion); 153 } 154 else 155 { 156 if(size > 0) 157 { 158 parseDestinationToken(token, majorVersion); 159 160 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) 161 { 162 token++; 163 size--; 164 } 165 166 token++; 167 size--; 168 } 169 170 if(predicate) 171 { 172 ASSERT(size != 0); 173 174 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; 175 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); 176 177 token++; 178 size--; 179 } 180 181 for(int i = 0; size > 0; i++) 182 { 183 parseSourceToken(i, token, majorVersion); 184 185 token++; 186 size--; 187 188 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) 189 { 190 token++; 191 size--; 192 } 193 } 194 } 195 } 196 ~Instruction()197 Shader::Instruction::~Instruction() 198 { 199 } 200 string(ShaderType shaderType,unsigned short version) const201 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const 202 { 203 std::string instructionString; 204 205 if(opcode != OPCODE_DCL) 206 { 207 instructionString += coissue ? "+ " : ""; 208 209 if(predicate) 210 { 211 instructionString += predicateNot ? "(!p0" : "(p0"; 212 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); 213 instructionString += ") "; 214 } 215 216 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); 217 218 if(dst.type != PARAMETER_VOID) 219 { 220 instructionString += " " + dst.string(shaderType, version) + 221 dst.relativeString() + 222 dst.maskString(); 223 } 224 225 for(int i = 0; i < 4; i++) 226 { 227 if(src[i].type != PARAMETER_VOID) 228 { 229 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " "; 230 instructionString += src[i].preModifierString() + 231 src[i].string(shaderType, version) + 232 src[i].relativeString() + 233 src[i].postModifierString() + 234 src[i].swizzleString(); 235 } 236 } 237 } 238 else // DCL 239 { 240 instructionString += "dcl"; 241 242 if(dst.type == PARAMETER_SAMPLER) 243 { 244 switch(samplerType) 245 { 246 case SAMPLER_UNKNOWN: instructionString += " "; break; 247 case SAMPLER_1D: instructionString += "_1d "; break; 248 case SAMPLER_2D: instructionString += "_2d "; break; 249 case SAMPLER_CUBE: instructionString += "_cube "; break; 250 case SAMPLER_VOLUME: instructionString += "_volume "; break; 251 default: 252 ASSERT(false); 253 } 254 255 instructionString += dst.string(shaderType, version); 256 } 257 else if(dst.type == PARAMETER_INPUT || 258 dst.type == PARAMETER_OUTPUT || 259 dst.type == PARAMETER_TEXTURE) 260 { 261 if(version >= 0x0300) 262 { 263 switch(usage) 264 { 265 case USAGE_POSITION: instructionString += "_position"; break; 266 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; 267 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break; 268 case USAGE_NORMAL: instructionString += "_normal"; break; 269 case USAGE_PSIZE: instructionString += "_psize"; break; 270 case USAGE_TEXCOORD: instructionString += "_texcoord"; break; 271 case USAGE_TANGENT: instructionString += "_tangent"; break; 272 case USAGE_BINORMAL: instructionString += "_binormal"; break; 273 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; 274 case USAGE_POSITIONT: instructionString += "_positiont"; break; 275 case USAGE_COLOR: instructionString += "_color"; break; 276 case USAGE_FOG: instructionString += "_fog"; break; 277 case USAGE_DEPTH: instructionString += "_depth"; break; 278 case USAGE_SAMPLE: instructionString += "_sample"; break; 279 default: 280 ASSERT(false); 281 } 282 283 if(usageIndex > 0) 284 { 285 std::ostringstream buffer; 286 287 buffer << (int)usageIndex; 288 289 instructionString += buffer.str(); 290 } 291 } 292 else ASSERT(dst.type != PARAMETER_OUTPUT); 293 294 instructionString += " "; 295 296 instructionString += dst.string(shaderType, version); 297 instructionString += dst.maskString(); 298 } 299 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace 300 { 301 instructionString += " "; 302 303 instructionString += dst.string(shaderType, version); 304 } 305 else ASSERT(false); 306 } 307 308 return instructionString; 309 } 310 modifierString() const311 std::string Shader::DestinationParameter::modifierString() const 312 { 313 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 314 { 315 return ""; 316 } 317 318 std::string modifierString; 319 320 if(integer) 321 { 322 modifierString += "_int"; 323 } 324 325 if(saturate) 326 { 327 modifierString += "_sat"; 328 } 329 330 if(partialPrecision) 331 { 332 modifierString += "_pp"; 333 } 334 335 if(centroid) 336 { 337 modifierString += "_centroid"; 338 } 339 340 return modifierString; 341 } 342 shiftString() const343 std::string Shader::DestinationParameter::shiftString() const 344 { 345 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 346 { 347 return ""; 348 } 349 350 switch(shift) 351 { 352 case 0: return ""; 353 case 1: return "_x2"; 354 case 2: return "_x4"; 355 case 3: return "_x8"; 356 case -1: return "_d2"; 357 case -2: return "_d4"; 358 case -3: return "_d8"; 359 default: 360 return ""; 361 // ASSERT(false); // FIXME 362 } 363 } 364 maskString() const365 std::string Shader::DestinationParameter::maskString() const 366 { 367 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 368 { 369 return ""; 370 } 371 372 switch(mask) 373 { 374 case 0x0: return ""; 375 case 0x1: return ".x"; 376 case 0x2: return ".y"; 377 case 0x3: return ".xy"; 378 case 0x4: return ".z"; 379 case 0x5: return ".xz"; 380 case 0x6: return ".yz"; 381 case 0x7: return ".xyz"; 382 case 0x8: return ".w"; 383 case 0x9: return ".xw"; 384 case 0xA: return ".yw"; 385 case 0xB: return ".xyw"; 386 case 0xC: return ".zw"; 387 case 0xD: return ".xzw"; 388 case 0xE: return ".yzw"; 389 case 0xF: return ""; 390 default: 391 ASSERT(false); 392 } 393 394 return ""; 395 } 396 preModifierString() const397 std::string Shader::SourceParameter::preModifierString() const 398 { 399 if(type == PARAMETER_VOID) 400 { 401 return ""; 402 } 403 404 switch(modifier) 405 { 406 case MODIFIER_NONE: return ""; 407 case MODIFIER_NEGATE: return "-"; 408 case MODIFIER_BIAS: return ""; 409 case MODIFIER_BIAS_NEGATE: return "-"; 410 case MODIFIER_SIGN: return ""; 411 case MODIFIER_SIGN_NEGATE: return "-"; 412 case MODIFIER_COMPLEMENT: return "1-"; 413 case MODIFIER_X2: return ""; 414 case MODIFIER_X2_NEGATE: return "-"; 415 case MODIFIER_DZ: return ""; 416 case MODIFIER_DW: return ""; 417 case MODIFIER_ABS: return ""; 418 case MODIFIER_ABS_NEGATE: return "-"; 419 case MODIFIER_NOT: return "!"; 420 default: 421 ASSERT(false); 422 } 423 424 return ""; 425 } 426 relativeString() const427 std::string Shader::Parameter::relativeString() const 428 { 429 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) 430 { 431 if(rel.type == PARAMETER_VOID) 432 { 433 return ""; 434 } 435 else if(rel.type == PARAMETER_ADDR) 436 { 437 switch(rel.swizzle & 0x03) 438 { 439 case 0: return "[a0.x]"; 440 case 1: return "[a0.y]"; 441 case 2: return "[a0.z]"; 442 case 3: return "[a0.w]"; 443 } 444 } 445 else if(rel.type == PARAMETER_TEMP) 446 { 447 std::ostringstream buffer; 448 buffer << rel.index; 449 450 switch(rel.swizzle & 0x03) 451 { 452 case 0: return "[r" + buffer.str() + ".x]"; 453 case 1: return "[r" + buffer.str() + ".y]"; 454 case 2: return "[r" + buffer.str() + ".z]"; 455 case 3: return "[r" + buffer.str() + ".w]"; 456 } 457 } 458 else if(rel.type == PARAMETER_LOOP) 459 { 460 return "[aL]"; 461 } 462 else if(rel.type == PARAMETER_CONST) 463 { 464 std::ostringstream buffer; 465 buffer << rel.index; 466 467 switch(rel.swizzle & 0x03) 468 { 469 case 0: return "[c" + buffer.str() + ".x]"; 470 case 1: return "[c" + buffer.str() + ".y]"; 471 case 2: return "[c" + buffer.str() + ".z]"; 472 case 3: return "[c" + buffer.str() + ".w]"; 473 } 474 } 475 else ASSERT(false); 476 } 477 478 return ""; 479 } 480 postModifierString() const481 std::string Shader::SourceParameter::postModifierString() const 482 { 483 if(type == PARAMETER_VOID) 484 { 485 return ""; 486 } 487 488 switch(modifier) 489 { 490 case MODIFIER_NONE: return ""; 491 case MODIFIER_NEGATE: return ""; 492 case MODIFIER_BIAS: return "_bias"; 493 case MODIFIER_BIAS_NEGATE: return "_bias"; 494 case MODIFIER_SIGN: return "_bx2"; 495 case MODIFIER_SIGN_NEGATE: return "_bx2"; 496 case MODIFIER_COMPLEMENT: return ""; 497 case MODIFIER_X2: return "_x2"; 498 case MODIFIER_X2_NEGATE: return "_x2"; 499 case MODIFIER_DZ: return "_dz"; 500 case MODIFIER_DW: return "_dw"; 501 case MODIFIER_ABS: return "_abs"; 502 case MODIFIER_ABS_NEGATE: return "_abs"; 503 case MODIFIER_NOT: return ""; 504 default: 505 ASSERT(false); 506 } 507 508 return ""; 509 } 510 swizzleString() const511 std::string Shader::SourceParameter::swizzleString() const 512 { 513 return Instruction::swizzleString(type, swizzle); 514 } 515 parseOperationToken(unsigned long token,unsigned char majorVersion)516 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion) 517 { 518 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token 519 { 520 opcode = (Opcode)token; 521 522 control = CONTROL_RESERVED0; 523 predicate = false; 524 coissue = false; 525 } 526 else 527 { 528 opcode = (Opcode)(token & 0x0000FFFF); 529 control = (Control)((token & 0x00FF0000) >> 16); 530 531 int size = (token & 0x0F000000) >> 24; 532 533 predicate = (token & 0x10000000) != 0x00000000; 534 coissue = (token & 0x40000000) != 0x00000000; 535 536 if(majorVersion < 2) 537 { 538 if(size != 0) 539 { 540 ASSERT(false); // Reserved 541 } 542 } 543 544 if(majorVersion < 2) 545 { 546 if(predicate) 547 { 548 ASSERT(false); 549 } 550 } 551 552 if((token & 0x20000000) != 0x00000000) 553 { 554 ASSERT(false); // Reserved 555 } 556 557 if(majorVersion >= 2) 558 { 559 if(coissue) 560 { 561 ASSERT(false); // Reserved 562 } 563 } 564 565 if((token & 0x80000000) != 0x00000000) 566 { 567 ASSERT(false); 568 } 569 } 570 } 571 parseDeclarationToken(unsigned long token)572 void Shader::Instruction::parseDeclarationToken(unsigned long token) 573 { 574 samplerType = (SamplerType)((token & 0x78000000) >> 27); 575 usage = (Usage)(token & 0x0000001F); 576 usageIndex = (unsigned char)((token & 0x000F0000) >> 16); 577 } 578 parseDestinationToken(const unsigned long * token,unsigned char majorVersion)579 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) 580 { 581 dst.index = (unsigned short)(token[0] & 0x000007FF); 582 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 583 584 // TODO: Check type and index range 585 586 bool relative = (token[0] & 0x00002000) != 0x00000000; 587 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 588 dst.rel.swizzle = 0x00; 589 dst.rel.scale = 1; 590 591 if(relative && majorVersion >= 3) 592 { 593 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 594 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 595 } 596 else if(relative) ASSERT(false); // Reserved 597 598 if((token[0] & 0x0000C000) != 0x00000000) 599 { 600 ASSERT(false); // Reserved 601 } 602 603 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); 604 dst.saturate = (token[0] & 0x00100000) != 0; 605 dst.partialPrecision = (token[0] & 0x00200000) != 0; 606 dst.centroid = (token[0] & 0x00400000) != 0; 607 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; 608 609 if(majorVersion >= 2) 610 { 611 if(dst.shift) 612 { 613 ASSERT(false); // Reserved 614 } 615 } 616 617 if((token[0] & 0x80000000) != 0x80000000) 618 { 619 ASSERT(false); 620 } 621 } 622 parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)623 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) 624 { 625 // Defaults 626 src[i].index = 0; 627 src[i].type = PARAMETER_VOID; 628 src[i].modifier = MODIFIER_NONE; 629 src[i].swizzle = 0xE4; 630 src[i].rel.type = PARAMETER_VOID; 631 src[i].rel.swizzle = 0x00; 632 src[i].rel.scale = 1; 633 634 switch(opcode) 635 { 636 case OPCODE_DEF: 637 src[0].type = PARAMETER_FLOAT4LITERAL; 638 src[0].value[i] = *(float*)token; 639 break; 640 case OPCODE_DEFB: 641 src[0].type = PARAMETER_BOOL1LITERAL; 642 src[0].boolean[0] = *(int*)token; 643 break; 644 case OPCODE_DEFI: 645 src[0].type = PARAMETER_INT4LITERAL; 646 src[0].integer[i] = *(int*)token; 647 break; 648 default: 649 src[i].index = (unsigned short)(token[0] & 0x000007FF); 650 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 651 652 // FIXME: Check type and index range 653 654 bool relative = (token[0] & 0x00002000) != 0x00000000; 655 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 656 657 if((token[0] & 0x0000C000) != 0x00000000) 658 { 659 if(opcode != OPCODE_DEF && 660 opcode != OPCODE_DEFI && 661 opcode != OPCODE_DEFB) 662 { 663 ASSERT(false); 664 } 665 } 666 667 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); 668 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); 669 670 if((token[0] & 0x80000000) != 0x80000000) 671 { 672 if(opcode != OPCODE_DEF && 673 opcode != OPCODE_DEFI && 674 opcode != OPCODE_DEFB) 675 { 676 ASSERT(false); 677 } 678 } 679 680 if(relative && majorVersion >= 2) 681 { 682 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 683 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 684 } 685 } 686 } 687 swizzleString(ParameterType type,unsigned char swizzle)688 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) 689 { 690 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) 691 { 692 return ""; 693 } 694 695 int x = (swizzle & 0x03) >> 0; 696 int y = (swizzle & 0x0C) >> 2; 697 int z = (swizzle & 0x30) >> 4; 698 int w = (swizzle & 0xC0) >> 6; 699 700 std::string swizzleString = "."; 701 702 switch(x) 703 { 704 case 0: swizzleString += "x"; break; 705 case 1: swizzleString += "y"; break; 706 case 2: swizzleString += "z"; break; 707 case 3: swizzleString += "w"; break; 708 } 709 710 if(!(x == y && y == z && z == w)) 711 { 712 switch(y) 713 { 714 case 0: swizzleString += "x"; break; 715 case 1: swizzleString += "y"; break; 716 case 2: swizzleString += "z"; break; 717 case 3: swizzleString += "w"; break; 718 } 719 720 if(!(y == z && z == w)) 721 { 722 switch(z) 723 { 724 case 0: swizzleString += "x"; break; 725 case 1: swizzleString += "y"; break; 726 case 2: swizzleString += "z"; break; 727 case 3: swizzleString += "w"; break; 728 } 729 730 if(!(z == w)) 731 { 732 switch(w) 733 { 734 case 0: swizzleString += "x"; break; 735 case 1: swizzleString += "y"; break; 736 case 2: swizzleString += "z"; break; 737 case 3: swizzleString += "w"; break; 738 } 739 } 740 } 741 } 742 743 return swizzleString; 744 } 745 operationString(unsigned short version) const746 std::string Shader::Instruction::operationString(unsigned short version) const 747 { 748 switch(opcode) 749 { 750 case OPCODE_NULL: return "null"; 751 case OPCODE_NOP: return "nop"; 752 case OPCODE_MOV: return "mov"; 753 case OPCODE_ADD: return "add"; 754 case OPCODE_IADD: return "iadd"; 755 case OPCODE_SUB: return "sub"; 756 case OPCODE_ISUB: return "isub"; 757 case OPCODE_MAD: return "mad"; 758 case OPCODE_IMAD: return "imad"; 759 case OPCODE_MUL: return "mul"; 760 case OPCODE_IMUL: return "imul"; 761 case OPCODE_RCPX: return "rcpx"; 762 case OPCODE_DIV: return "div"; 763 case OPCODE_IDIV: return "idiv"; 764 case OPCODE_UDIV: return "udiv"; 765 case OPCODE_MOD: return "mod"; 766 case OPCODE_IMOD: return "imod"; 767 case OPCODE_UMOD: return "umod"; 768 case OPCODE_SHL: return "shl"; 769 case OPCODE_ISHR: return "ishr"; 770 case OPCODE_USHR: return "ushr"; 771 case OPCODE_RSQX: return "rsqx"; 772 case OPCODE_SQRT: return "sqrt"; 773 case OPCODE_RSQ: return "rsq"; 774 case OPCODE_LEN2: return "len2"; 775 case OPCODE_LEN3: return "len3"; 776 case OPCODE_LEN4: return "len4"; 777 case OPCODE_DIST1: return "dist1"; 778 case OPCODE_DIST2: return "dist2"; 779 case OPCODE_DIST3: return "dist3"; 780 case OPCODE_DIST4: return "dist4"; 781 case OPCODE_DP3: return "dp3"; 782 case OPCODE_DP4: return "dp4"; 783 case OPCODE_DET2: return "det2"; 784 case OPCODE_DET3: return "det3"; 785 case OPCODE_DET4: return "det4"; 786 case OPCODE_MIN: return "min"; 787 case OPCODE_IMIN: return "imin"; 788 case OPCODE_UMIN: return "umin"; 789 case OPCODE_MAX: return "max"; 790 case OPCODE_IMAX: return "imax"; 791 case OPCODE_UMAX: return "umax"; 792 case OPCODE_SLT: return "slt"; 793 case OPCODE_SGE: return "sge"; 794 case OPCODE_EXP2X: return "exp2x"; 795 case OPCODE_LOG2X: return "log2x"; 796 case OPCODE_LIT: return "lit"; 797 case OPCODE_ATT: return "att"; 798 case OPCODE_LRP: return "lrp"; 799 case OPCODE_STEP: return "step"; 800 case OPCODE_SMOOTH: return "smooth"; 801 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; 802 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt"; 803 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; 804 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat"; 805 case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; 806 case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; 807 case OPCODE_PACKHALF2x16: return "packHalf2x16"; 808 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16"; 809 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16"; 810 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; 811 case OPCODE_FRC: return "frc"; 812 case OPCODE_M4X4: return "m4x4"; 813 case OPCODE_M4X3: return "m4x3"; 814 case OPCODE_M3X4: return "m3x4"; 815 case OPCODE_M3X3: return "m3x3"; 816 case OPCODE_M3X2: return "m3x2"; 817 case OPCODE_CALL: return "call"; 818 case OPCODE_CALLNZ: return "callnz"; 819 case OPCODE_LOOP: return "loop"; 820 case OPCODE_RET: return "ret"; 821 case OPCODE_ENDLOOP: return "endloop"; 822 case OPCODE_LABEL: return "label"; 823 case OPCODE_DCL: return "dcl"; 824 case OPCODE_POWX: return "powx"; 825 case OPCODE_CRS: return "crs"; 826 case OPCODE_SGN: return "sgn"; 827 case OPCODE_ISGN: return "isgn"; 828 case OPCODE_ABS: return "abs"; 829 case OPCODE_IABS: return "iabs"; 830 case OPCODE_NRM2: return "nrm2"; 831 case OPCODE_NRM3: return "nrm3"; 832 case OPCODE_NRM4: return "nrm4"; 833 case OPCODE_SINCOS: return "sincos"; 834 case OPCODE_REP: return "rep"; 835 case OPCODE_ENDREP: return "endrep"; 836 case OPCODE_IF: return "if"; 837 case OPCODE_IFC: return "ifc"; 838 case OPCODE_ELSE: return "else"; 839 case OPCODE_ENDIF: return "endif"; 840 case OPCODE_BREAK: return "break"; 841 case OPCODE_BREAKC: return "breakc"; 842 case OPCODE_MOVA: return "mova"; 843 case OPCODE_DEFB: return "defb"; 844 case OPCODE_DEFI: return "defi"; 845 case OPCODE_TEXCOORD: return "texcoord"; 846 case OPCODE_TEXKILL: return "texkill"; 847 case OPCODE_DISCARD: return "discard"; 848 case OPCODE_TEX: 849 if(version < 0x0104) return "tex"; 850 else return "texld"; 851 case OPCODE_TEXBEM: return "texbem"; 852 case OPCODE_TEXBEML: return "texbeml"; 853 case OPCODE_TEXREG2AR: return "texreg2ar"; 854 case OPCODE_TEXREG2GB: return "texreg2gb"; 855 case OPCODE_TEXM3X2PAD: return "texm3x2pad"; 856 case OPCODE_TEXM3X2TEX: return "texm3x2tex"; 857 case OPCODE_TEXM3X3PAD: return "texm3x3pad"; 858 case OPCODE_TEXM3X3TEX: return "texm3x3tex"; 859 case OPCODE_RESERVED0: return "reserved0"; 860 case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; 861 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; 862 case OPCODE_EXPP: return "expp"; 863 case OPCODE_LOGP: return "logp"; 864 case OPCODE_CND: return "cnd"; 865 case OPCODE_DEF: return "def"; 866 case OPCODE_TEXREG2RGB: return "texreg2rgb"; 867 case OPCODE_TEXDP3TEX: return "texdp3tex"; 868 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; 869 case OPCODE_TEXDP3: return "texdp3"; 870 case OPCODE_TEXM3X3: return "texm3x3"; 871 case OPCODE_TEXDEPTH: return "texdepth"; 872 case OPCODE_CMP0: return "cmp0"; 873 case OPCODE_ICMP: return "icmp"; 874 case OPCODE_UCMP: return "ucmp"; 875 case OPCODE_SELECT: return "select"; 876 case OPCODE_EXTRACT: return "extract"; 877 case OPCODE_INSERT: return "insert"; 878 case OPCODE_BEM: return "bem"; 879 case OPCODE_DP2ADD: return "dp2add"; 880 case OPCODE_DFDX: return "dFdx"; 881 case OPCODE_DFDY: return "dFdy"; 882 case OPCODE_FWIDTH: return "fwidth"; 883 case OPCODE_TEXLDD: return "texldd"; 884 case OPCODE_CMP: return "cmp"; 885 case OPCODE_TEXLDL: return "texldl"; 886 case OPCODE_TEXOFFSET: return "texoffset"; 887 case OPCODE_TEXLDLOFFSET: return "texldloffset"; 888 case OPCODE_TEXELFETCH: return "texelfetch"; 889 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset"; 890 case OPCODE_TEXGRAD: return "texgrad"; 891 case OPCODE_TEXGRADOFFSET: return "texgradoffset"; 892 case OPCODE_BREAKP: return "breakp"; 893 case OPCODE_TEXSIZE: return "texsize"; 894 case OPCODE_PHASE: return "phase"; 895 case OPCODE_COMMENT: return "comment"; 896 case OPCODE_END: return "end"; 897 case OPCODE_PS_1_0: return "ps_1_0"; 898 case OPCODE_PS_1_1: return "ps_1_1"; 899 case OPCODE_PS_1_2: return "ps_1_2"; 900 case OPCODE_PS_1_3: return "ps_1_3"; 901 case OPCODE_PS_1_4: return "ps_1_4"; 902 case OPCODE_PS_2_0: return "ps_2_0"; 903 case OPCODE_PS_2_x: return "ps_2_x"; 904 case OPCODE_PS_3_0: return "ps_3_0"; 905 case OPCODE_VS_1_0: return "vs_1_0"; 906 case OPCODE_VS_1_1: return "vs_1_1"; 907 case OPCODE_VS_2_0: return "vs_2_0"; 908 case OPCODE_VS_2_x: return "vs_2_x"; 909 case OPCODE_VS_2_sw: return "vs_2_sw"; 910 case OPCODE_VS_3_0: return "vs_3_0"; 911 case OPCODE_VS_3_sw: return "vs_3_sw"; 912 case OPCODE_WHILE: return "while"; 913 case OPCODE_ENDWHILE: return "endwhile"; 914 case OPCODE_COS: return "cos"; 915 case OPCODE_SIN: return "sin"; 916 case OPCODE_TAN: return "tan"; 917 case OPCODE_ACOS: return "acos"; 918 case OPCODE_ASIN: return "asin"; 919 case OPCODE_ATAN: return "atan"; 920 case OPCODE_ATAN2: return "atan2"; 921 case OPCODE_COSH: return "cosh"; 922 case OPCODE_SINH: return "sinh"; 923 case OPCODE_TANH: return "tanh"; 924 case OPCODE_ACOSH: return "acosh"; 925 case OPCODE_ASINH: return "asinh"; 926 case OPCODE_ATANH: return "atanh"; 927 case OPCODE_DP1: return "dp1"; 928 case OPCODE_DP2: return "dp2"; 929 case OPCODE_TRUNC: return "trunc"; 930 case OPCODE_FLOOR: return "floor"; 931 case OPCODE_ROUND: return "round"; 932 case OPCODE_ROUNDEVEN: return "roundEven"; 933 case OPCODE_CEIL: return "ceil"; 934 case OPCODE_EXP2: return "exp2"; 935 case OPCODE_LOG2: return "log2"; 936 case OPCODE_EXP: return "exp"; 937 case OPCODE_LOG: return "log"; 938 case OPCODE_POW: return "pow"; 939 case OPCODE_F2B: return "f2b"; 940 case OPCODE_B2F: return "b2f"; 941 case OPCODE_F2I: return "f2i"; 942 case OPCODE_I2F: return "i2f"; 943 case OPCODE_F2U: return "f2u"; 944 case OPCODE_U2F: return "u2f"; 945 case OPCODE_B2I: return "b2i"; 946 case OPCODE_I2B: return "i2b"; 947 case OPCODE_ALL: return "all"; 948 case OPCODE_ANY: return "any"; 949 case OPCODE_NEG: return "neg"; 950 case OPCODE_INEG: return "ineg"; 951 case OPCODE_ISNAN: return "isnan"; 952 case OPCODE_ISINF: return "isinf"; 953 case OPCODE_NOT: return "not"; 954 case OPCODE_OR: return "or"; 955 case OPCODE_XOR: return "xor"; 956 case OPCODE_AND: return "and"; 957 case OPCODE_EQ: return "eq"; 958 case OPCODE_NE: return "neq"; 959 case OPCODE_FORWARD1: return "forward1"; 960 case OPCODE_FORWARD2: return "forward2"; 961 case OPCODE_FORWARD3: return "forward3"; 962 case OPCODE_FORWARD4: return "forward4"; 963 case OPCODE_REFLECT1: return "reflect1"; 964 case OPCODE_REFLECT2: return "reflect2"; 965 case OPCODE_REFLECT3: return "reflect3"; 966 case OPCODE_REFLECT4: return "reflect4"; 967 case OPCODE_REFRACT1: return "refract1"; 968 case OPCODE_REFRACT2: return "refract2"; 969 case OPCODE_REFRACT3: return "refract3"; 970 case OPCODE_REFRACT4: return "refract4"; 971 case OPCODE_LEAVE: return "leave"; 972 case OPCODE_CONTINUE: return "continue"; 973 case OPCODE_TEST: return "test"; 974 case OPCODE_SWITCH: return "switch"; 975 case OPCODE_ENDSWITCH: return "endswitch"; 976 default: 977 ASSERT(false); 978 } 979 980 return "<unknown>"; 981 } 982 controlString() const983 std::string Shader::Instruction::controlString() const 984 { 985 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) 986 { 987 if(project) return "p"; 988 989 if(bias) return "b"; 990 991 // FIXME: LOD 992 } 993 994 switch(control) 995 { 996 case 1: return "_gt"; 997 case 2: return "_eq"; 998 case 3: return "_ge"; 999 case 4: return "_lt"; 1000 case 5: return "_ne"; 1001 case 6: return "_le"; 1002 default: 1003 return ""; 1004 // ASSERT(false); // FIXME 1005 } 1006 } 1007 string(ShaderType shaderType,unsigned short version) const1008 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const 1009 { 1010 std::ostringstream buffer; 1011 1012 if(type == PARAMETER_FLOAT4LITERAL) 1013 { 1014 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; 1015 1016 return buffer.str(); 1017 } 1018 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) 1019 { 1020 buffer << index; 1021 1022 return typeString(shaderType, version) + buffer.str(); 1023 } 1024 else 1025 { 1026 return typeString(shaderType, version); 1027 } 1028 } 1029 typeString(ShaderType shaderType,unsigned short version) const1030 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const 1031 { 1032 switch(type) 1033 { 1034 case PARAMETER_TEMP: return "r"; 1035 case PARAMETER_INPUT: return "v"; 1036 case PARAMETER_CONST: return "c"; 1037 case PARAMETER_TEXTURE: 1038 // case PARAMETER_ADDR: 1039 if(shaderType == SHADER_PIXEL) return "t"; 1040 else return "a0"; 1041 case PARAMETER_RASTOUT: 1042 if(index == 0) return "oPos"; 1043 else if(index == 1) return "oFog"; 1044 else if(index == 2) return "oPts"; 1045 else ASSERT(false); 1046 case PARAMETER_ATTROUT: return "oD"; 1047 case PARAMETER_TEXCRDOUT: 1048 // case PARAMETER_OUTPUT: return ""; 1049 if(version < 0x0300) return "oT"; 1050 else return "o"; 1051 case PARAMETER_CONSTINT: return "i"; 1052 case PARAMETER_COLOROUT: return "oC"; 1053 case PARAMETER_DEPTHOUT: return "oDepth"; 1054 case PARAMETER_SAMPLER: return "s"; 1055 // case PARAMETER_CONST2: return ""; 1056 // case PARAMETER_CONST3: return ""; 1057 // case PARAMETER_CONST4: return ""; 1058 case PARAMETER_CONSTBOOL: return "b"; 1059 case PARAMETER_LOOP: return "aL"; 1060 // case PARAMETER_TEMPFLOAT16: return ""; 1061 case PARAMETER_MISCTYPE: 1062 switch(index) 1063 { 1064 case VPosIndex: return "vPos"; 1065 case VFaceIndex: return "vFace"; 1066 case InstanceIDIndex: return "iID"; 1067 case VertexIDIndex: return "vID"; 1068 default: ASSERT(false); 1069 } 1070 case PARAMETER_LABEL: return "l"; 1071 case PARAMETER_PREDICATE: return "p0"; 1072 case PARAMETER_FLOAT4LITERAL: return ""; 1073 case PARAMETER_BOOL1LITERAL: return ""; 1074 case PARAMETER_INT4LITERAL: return ""; 1075 // case PARAMETER_VOID: return ""; 1076 default: 1077 ASSERT(false); 1078 } 1079 1080 return ""; 1081 } 1082 isBranch() const1083 bool Shader::Instruction::isBranch() const 1084 { 1085 return opcode == OPCODE_IF || opcode == OPCODE_IFC; 1086 } 1087 isCall() const1088 bool Shader::Instruction::isCall() const 1089 { 1090 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; 1091 } 1092 isBreak() const1093 bool Shader::Instruction::isBreak() const 1094 { 1095 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; 1096 } 1097 isLoop() const1098 bool Shader::Instruction::isLoop() const 1099 { 1100 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE; 1101 } 1102 isEndLoop() const1103 bool Shader::Instruction::isEndLoop() const 1104 { 1105 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE; 1106 } 1107 isPredicated() const1108 bool Shader::Instruction::isPredicated() const 1109 { 1110 return predicate || 1111 analysisBranch || 1112 analysisBreak || 1113 analysisContinue || 1114 analysisLeave; 1115 } 1116 Shader()1117 Shader::Shader() : serialID(serialCounter++) 1118 { 1119 usedSamplers = 0; 1120 } 1121 ~Shader()1122 Shader::~Shader() 1123 { 1124 for(unsigned int i = 0; i < instruction.size(); i++) 1125 { 1126 delete instruction[i]; 1127 instruction[i] = 0; 1128 } 1129 } 1130 parse(const unsigned long * token)1131 void Shader::parse(const unsigned long *token) 1132 { 1133 minorVersion = (unsigned char)(token[0] & 0x000000FF); 1134 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); 1135 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); 1136 1137 int length = 0; 1138 1139 if(shaderType == SHADER_VERTEX) 1140 { 1141 length = VertexShader::validate(token); 1142 } 1143 else if(shaderType == SHADER_PIXEL) 1144 { 1145 length = PixelShader::validate(token); 1146 } 1147 else ASSERT(false); 1148 1149 ASSERT(length != 0); 1150 instruction.resize(length); 1151 1152 for(int i = 0; i < length; i++) 1153 { 1154 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token 1155 { 1156 int length = (*token & 0x7FFF0000) >> 16; 1157 1158 token += length + 1; 1159 } 1160 1161 int tokenCount = size(*token); 1162 1163 instruction[i] = new Instruction(token, tokenCount, majorVersion); 1164 1165 token += 1 + tokenCount; 1166 } 1167 } 1168 size(unsigned long opcode) const1169 int Shader::size(unsigned long opcode) const 1170 { 1171 return size(opcode, version); 1172 } 1173 size(unsigned long opcode,unsigned short version)1174 int Shader::size(unsigned long opcode, unsigned short version) 1175 { 1176 if(version > 0x0300) 1177 { 1178 ASSERT(false); 1179 } 1180 1181 static const signed char size[] = 1182 { 1183 0, // NOP = 0 1184 2, // MOV 1185 3, // ADD 1186 3, // SUB 1187 4, // MAD 1188 3, // MUL 1189 2, // RCP 1190 2, // RSQ 1191 3, // DP3 1192 3, // DP4 1193 3, // MIN 1194 3, // MAX 1195 3, // SLT 1196 3, // SGE 1197 2, // EXP 1198 2, // LOG 1199 2, // LIT 1200 3, // DST 1201 4, // LRP 1202 2, // FRC 1203 3, // M4x4 1204 3, // M4x3 1205 3, // M3x4 1206 3, // M3x3 1207 3, // M3x2 1208 1, // CALL 1209 2, // CALLNZ 1210 2, // LOOP 1211 0, // RET 1212 0, // ENDLOOP 1213 1, // LABEL 1214 2, // DCL 1215 3, // POW 1216 3, // CRS 1217 4, // SGN 1218 2, // ABS 1219 2, // NRM 1220 4, // SINCOS 1221 1, // REP 1222 0, // ENDREP 1223 1, // IF 1224 2, // IFC 1225 0, // ELSE 1226 0, // ENDIF 1227 0, // BREAK 1228 2, // BREAKC 1229 2, // MOVA 1230 2, // DEFB 1231 5, // DEFI 1232 -1, // 49 1233 -1, // 50 1234 -1, // 51 1235 -1, // 52 1236 -1, // 53 1237 -1, // 54 1238 -1, // 55 1239 -1, // 56 1240 -1, // 57 1241 -1, // 58 1242 -1, // 59 1243 -1, // 60 1244 -1, // 61 1245 -1, // 62 1246 -1, // 63 1247 1, // TEXCOORD = 64 1248 1, // TEXKILL 1249 1, // TEX 1250 2, // TEXBEM 1251 2, // TEXBEML 1252 2, // TEXREG2AR 1253 2, // TEXREG2GB 1254 2, // TEXM3x2PAD 1255 2, // TEXM3x2TEX 1256 2, // TEXM3x3PAD 1257 2, // TEXM3x3TEX 1258 -1, // RESERVED0 1259 3, // TEXM3x3SPEC 1260 2, // TEXM3x3VSPEC 1261 2, // EXPP 1262 2, // LOGP 1263 4, // CND 1264 5, // DEF 1265 2, // TEXREG2RGB 1266 2, // TEXDP3TEX 1267 2, // TEXM3x2DEPTH 1268 2, // TEXDP3 1269 2, // TEXM3x3 1270 1, // TEXDEPTH 1271 4, // CMP 1272 3, // BEM 1273 4, // DP2ADD 1274 2, // DSX 1275 2, // DSY 1276 5, // TEXLDD 1277 3, // SETP 1278 3, // TEXLDL 1279 2, // BREAKP 1280 -1, // 97 1281 -1, // 98 1282 -1, // 99 1283 -1, // 100 1284 -1, // 101 1285 -1, // 102 1286 -1, // 103 1287 -1, // 104 1288 -1, // 105 1289 -1, // 106 1290 -1, // 107 1291 -1, // 108 1292 -1, // 109 1293 -1, // 110 1294 -1, // 111 1295 -1, // 112 1296 }; 1297 1298 int length = 0; 1299 1300 if((opcode & 0x0000FFFF) == OPCODE_COMMENT) 1301 { 1302 return (opcode & 0x7FFF0000) >> 16; 1303 } 1304 1305 if(opcode != OPCODE_PS_1_0 && 1306 opcode != OPCODE_PS_1_1 && 1307 opcode != OPCODE_PS_1_2 && 1308 opcode != OPCODE_PS_1_3 && 1309 opcode != OPCODE_PS_1_4 && 1310 opcode != OPCODE_PS_2_0 && 1311 opcode != OPCODE_PS_2_x && 1312 opcode != OPCODE_PS_3_0 && 1313 opcode != OPCODE_VS_1_0 && 1314 opcode != OPCODE_VS_1_1 && 1315 opcode != OPCODE_VS_2_0 && 1316 opcode != OPCODE_VS_2_x && 1317 opcode != OPCODE_VS_2_sw && 1318 opcode != OPCODE_VS_3_0 && 1319 opcode != OPCODE_VS_3_sw && 1320 opcode != OPCODE_PHASE && 1321 opcode != OPCODE_END) 1322 { 1323 if(version >= 0x0200) 1324 { 1325 length = (opcode & 0x0F000000) >> 24; 1326 } 1327 else 1328 { 1329 length = size[opcode & 0x0000FFFF]; 1330 } 1331 } 1332 1333 if(length < 0) 1334 { 1335 ASSERT(false); 1336 } 1337 1338 if(version == 0x0104) 1339 { 1340 switch(opcode & 0x0000FFFF) 1341 { 1342 case OPCODE_TEX: 1343 length += 1; 1344 break; 1345 case OPCODE_TEXCOORD: 1346 length += 1; 1347 break; 1348 default: 1349 break; 1350 } 1351 } 1352 1353 return length; 1354 } 1355 maskContainsComponent(int mask,int component)1356 bool Shader::maskContainsComponent(int mask, int component) 1357 { 1358 return (mask & (1 << component)) != 0; 1359 } 1360 swizzleContainsComponent(int swizzle,int component)1361 bool Shader::swizzleContainsComponent(int swizzle, int component) 1362 { 1363 if((swizzle & 0x03) >> 0 == component) return true; 1364 if((swizzle & 0x0C) >> 2 == component) return true; 1365 if((swizzle & 0x30) >> 4 == component) return true; 1366 if((swizzle & 0xC0) >> 6 == component) return true; 1367 1368 return false; 1369 } 1370 swizzleContainsComponentMasked(int swizzle,int component,int mask)1371 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask) 1372 { 1373 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true; 1374 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true; 1375 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true; 1376 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true; 1377 1378 return false; 1379 } 1380 containsDynamicBranching() const1381 bool Shader::containsDynamicBranching() const 1382 { 1383 return dynamicBranching; 1384 } 1385 containsBreakInstruction() const1386 bool Shader::containsBreakInstruction() const 1387 { 1388 return containsBreak; 1389 } 1390 containsContinueInstruction() const1391 bool Shader::containsContinueInstruction() const 1392 { 1393 return containsContinue; 1394 } 1395 containsLeaveInstruction() const1396 bool Shader::containsLeaveInstruction() const 1397 { 1398 return containsLeave; 1399 } 1400 containsDefineInstruction() const1401 bool Shader::containsDefineInstruction() const 1402 { 1403 return containsDefine; 1404 } 1405 usesSampler(int index) const1406 bool Shader::usesSampler(int index) const 1407 { 1408 return (usedSamplers & (1 << index)) != 0; 1409 } 1410 getSerialID() const1411 int Shader::getSerialID() const 1412 { 1413 return serialID; 1414 } 1415 getLength() const1416 size_t Shader::getLength() const 1417 { 1418 return instruction.size(); 1419 } 1420 getShaderType() const1421 Shader::ShaderType Shader::getShaderType() const 1422 { 1423 return shaderType; 1424 } 1425 getVersion() const1426 unsigned short Shader::getVersion() const 1427 { 1428 return version; 1429 } 1430 print(const char * fileName,...) const1431 void Shader::print(const char *fileName, ...) const 1432 { 1433 char fullName[1024 + 1]; 1434 1435 va_list vararg; 1436 va_start(vararg, fileName); 1437 vsnprintf(fullName, 1024, fileName, vararg); 1438 va_end(vararg); 1439 1440 std::ofstream file(fullName, std::ofstream::out); 1441 1442 for(unsigned int i = 0; i < instruction.size(); i++) 1443 { 1444 file << instruction[i]->string(shaderType, version) << std::endl; 1445 } 1446 } 1447 printInstruction(int index,const char * fileName) const1448 void Shader::printInstruction(int index, const char *fileName) const 1449 { 1450 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app); 1451 1452 file << instruction[index]->string(shaderType, version) << std::endl; 1453 } 1454 append(Instruction * instruction)1455 void Shader::append(Instruction *instruction) 1456 { 1457 this->instruction.push_back(instruction); 1458 } 1459 declareSampler(int i)1460 void Shader::declareSampler(int i) 1461 { 1462 usedSamplers |= 1 << i; 1463 } 1464 getInstruction(size_t i) const1465 const Shader::Instruction *Shader::getInstruction(size_t i) const 1466 { 1467 ASSERT(i < instruction.size()); 1468 1469 return instruction[i]; 1470 } 1471 optimize()1472 void Shader::optimize() 1473 { 1474 optimizeLeave(); 1475 optimizeCall(); 1476 removeNull(); 1477 } 1478 optimizeLeave()1479 void Shader::optimizeLeave() 1480 { 1481 // A return (leave) right before the end of a function or the shader can be removed 1482 for(unsigned int i = 0; i < instruction.size(); i++) 1483 { 1484 if(instruction[i]->opcode == OPCODE_LEAVE) 1485 { 1486 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET) 1487 { 1488 instruction[i]->opcode = OPCODE_NULL; 1489 } 1490 } 1491 } 1492 } 1493 optimizeCall()1494 void Shader::optimizeCall() 1495 { 1496 // Eliminate uncalled functions 1497 std::set<int> calledFunctions; 1498 bool rescan = true; 1499 1500 while(rescan) 1501 { 1502 calledFunctions.clear(); 1503 rescan = false; 1504 1505 for(unsigned int i = 0; i < instruction.size(); i++) 1506 { 1507 if(instruction[i]->isCall()) 1508 { 1509 calledFunctions.insert(instruction[i]->dst.label); 1510 } 1511 } 1512 1513 if(!calledFunctions.empty()) 1514 { 1515 for(unsigned int i = 0; i < instruction.size(); i++) 1516 { 1517 if(instruction[i]->opcode == OPCODE_LABEL) 1518 { 1519 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end()) 1520 { 1521 for( ; i < instruction.size(); i++) 1522 { 1523 Opcode oldOpcode = instruction[i]->opcode; 1524 instruction[i]->opcode = OPCODE_NULL; 1525 1526 if(oldOpcode == OPCODE_RET) 1527 { 1528 rescan = true; 1529 break; 1530 } 1531 } 1532 } 1533 } 1534 } 1535 } 1536 } 1537 1538 // Optimize the entry call 1539 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET) 1540 { 1541 if(calledFunctions.size() == 1) 1542 { 1543 instruction[0]->opcode = OPCODE_NULL; 1544 instruction[1]->opcode = OPCODE_NULL; 1545 1546 for(size_t i = 2; i < instruction.size(); i++) 1547 { 1548 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET) 1549 { 1550 instruction[i]->opcode = OPCODE_NULL; 1551 } 1552 } 1553 } 1554 } 1555 } 1556 removeNull()1557 void Shader::removeNull() 1558 { 1559 size_t size = 0; 1560 for(size_t i = 0; i < instruction.size(); i++) 1561 { 1562 if(instruction[i]->opcode != OPCODE_NULL) 1563 { 1564 instruction[size] = instruction[i]; 1565 size++; 1566 } 1567 else 1568 { 1569 delete instruction[i]; 1570 } 1571 } 1572 1573 instruction.resize(size); 1574 } 1575 analyzeDirtyConstants()1576 void Shader::analyzeDirtyConstants() 1577 { 1578 dirtyConstantsF = 0; 1579 dirtyConstantsI = 0; 1580 dirtyConstantsB = 0; 1581 1582 for(unsigned int i = 0; i < instruction.size(); i++) 1583 { 1584 switch(instruction[i]->opcode) 1585 { 1586 case OPCODE_DEF: 1587 if(instruction[i]->dst.index + 1 > dirtyConstantsF) 1588 { 1589 dirtyConstantsF = instruction[i]->dst.index + 1; 1590 } 1591 break; 1592 case OPCODE_DEFI: 1593 if(instruction[i]->dst.index + 1 > dirtyConstantsI) 1594 { 1595 dirtyConstantsI = instruction[i]->dst.index + 1; 1596 } 1597 break; 1598 case OPCODE_DEFB: 1599 if(instruction[i]->dst.index + 1 > dirtyConstantsB) 1600 { 1601 dirtyConstantsB = instruction[i]->dst.index + 1; 1602 } 1603 break; 1604 default: 1605 break; 1606 } 1607 } 1608 } 1609 analyzeDynamicBranching()1610 void Shader::analyzeDynamicBranching() 1611 { 1612 dynamicBranching = false; 1613 containsLeave = false; 1614 containsBreak = false; 1615 containsContinue = false; 1616 containsDefine = false; 1617 1618 // Determine global presence of branching instructions 1619 for(unsigned int i = 0; i < instruction.size(); i++) 1620 { 1621 switch(instruction[i]->opcode) 1622 { 1623 case OPCODE_CALLNZ: 1624 case OPCODE_IF: 1625 case OPCODE_IFC: 1626 case OPCODE_BREAK: 1627 case OPCODE_BREAKC: 1628 case OPCODE_CMP: 1629 case OPCODE_BREAKP: 1630 case OPCODE_LEAVE: 1631 case OPCODE_CONTINUE: 1632 if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL) 1633 { 1634 dynamicBranching = true; 1635 } 1636 1637 if(instruction[i]->opcode == OPCODE_LEAVE) 1638 { 1639 containsLeave = true; 1640 } 1641 1642 if(instruction[i]->isBreak()) 1643 { 1644 containsBreak = true; 1645 } 1646 1647 if(instruction[i]->opcode == OPCODE_CONTINUE) 1648 { 1649 containsContinue = true; 1650 } 1651 case OPCODE_DEF: 1652 case OPCODE_DEFB: 1653 case OPCODE_DEFI: 1654 containsDefine = true; 1655 default: 1656 break; 1657 } 1658 } 1659 1660 // Conservatively determine which instructions are affected by dynamic branching 1661 int branchDepth = 0; 1662 int breakDepth = 0; 1663 int continueDepth = 0; 1664 bool leaveReturn = false; 1665 unsigned int functionBegin = 0; 1666 1667 for(unsigned int i = 0; i < instruction.size(); i++) 1668 { 1669 // If statements 1670 if(instruction[i]->isBranch()) 1671 { 1672 branchDepth++; 1673 } 1674 else if(instruction[i]->opcode == OPCODE_ENDIF) 1675 { 1676 branchDepth--; 1677 } 1678 1679 if(branchDepth > 0) 1680 { 1681 instruction[i]->analysisBranch = true; 1682 1683 if(instruction[i]->isCall()) 1684 { 1685 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1686 } 1687 } 1688 1689 // Break statemement 1690 if(instruction[i]->isBreak()) 1691 { 1692 breakDepth++; 1693 } 1694 1695 if(breakDepth > 0) 1696 { 1697 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1698 { 1699 breakDepth++; 1700 } 1701 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1702 { 1703 breakDepth--; 1704 } 1705 1706 instruction[i]->analysisBreak = true; 1707 1708 if(instruction[i]->isCall()) 1709 { 1710 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1711 } 1712 } 1713 1714 // Continue statement 1715 if(instruction[i]->opcode == OPCODE_CONTINUE) 1716 { 1717 continueDepth++; 1718 } 1719 1720 if(continueDepth > 0) 1721 { 1722 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1723 { 1724 continueDepth++; 1725 } 1726 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1727 { 1728 continueDepth--; 1729 } 1730 1731 instruction[i]->analysisContinue = true; 1732 1733 if(instruction[i]->isCall()) 1734 { 1735 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); 1736 } 1737 } 1738 1739 // Return (leave) statement 1740 if(instruction[i]->opcode == OPCODE_LEAVE) 1741 { 1742 leaveReturn = true; 1743 1744 // Mark loop body instructions prior to the return statement 1745 for(unsigned int l = functionBegin; l < i; l++) 1746 { 1747 if(instruction[l]->isLoop()) 1748 { 1749 for(unsigned int r = l + 1; r < i; r++) 1750 { 1751 instruction[r]->analysisLeave = true; 1752 } 1753 1754 break; 1755 } 1756 } 1757 } 1758 else if(instruction[i]->opcode == OPCODE_RET) // End of the function 1759 { 1760 leaveReturn = false; 1761 } 1762 else if(instruction[i]->opcode == OPCODE_LABEL) 1763 { 1764 functionBegin = i; 1765 } 1766 1767 if(leaveReturn) 1768 { 1769 instruction[i]->analysisLeave = true; 1770 1771 if(instruction[i]->isCall()) 1772 { 1773 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); 1774 } 1775 } 1776 } 1777 } 1778 markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1779 void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag) 1780 { 1781 bool marker = false; 1782 for(unsigned int i = 0; i < instruction.size(); i++) 1783 { 1784 if(!marker) 1785 { 1786 if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel) 1787 { 1788 marker = true; 1789 } 1790 } 1791 else 1792 { 1793 if(instruction[i]->opcode == OPCODE_RET) 1794 { 1795 break; 1796 } 1797 else if(instruction[i]->isCall()) 1798 { 1799 markFunctionAnalysis(instruction[i]->dst.label, flag); 1800 } 1801 1802 instruction[i]->analysis |= flag; 1803 } 1804 } 1805 } 1806 analyzeSamplers()1807 void Shader::analyzeSamplers() 1808 { 1809 for(unsigned int i = 0; i < instruction.size(); i++) 1810 { 1811 switch(instruction[i]->opcode) 1812 { 1813 case OPCODE_TEX: 1814 case OPCODE_TEXBEM: 1815 case OPCODE_TEXBEML: 1816 case OPCODE_TEXREG2AR: 1817 case OPCODE_TEXREG2GB: 1818 case OPCODE_TEXM3X2TEX: 1819 case OPCODE_TEXM3X3TEX: 1820 case OPCODE_TEXM3X3SPEC: 1821 case OPCODE_TEXM3X3VSPEC: 1822 case OPCODE_TEXREG2RGB: 1823 case OPCODE_TEXDP3TEX: 1824 case OPCODE_TEXM3X2DEPTH: 1825 case OPCODE_TEXLDD: 1826 case OPCODE_TEXLDL: 1827 case OPCODE_TEXOFFSET: 1828 case OPCODE_TEXLDLOFFSET: 1829 case OPCODE_TEXELFETCH: 1830 case OPCODE_TEXELFETCHOFFSET: 1831 case OPCODE_TEXGRAD: 1832 case OPCODE_TEXGRADOFFSET: 1833 { 1834 Parameter &dst = instruction[i]->dst; 1835 Parameter &src1 = instruction[i]->src[1]; 1836 1837 if(majorVersion >= 2) 1838 { 1839 usedSamplers |= 1 << src1.index; 1840 } 1841 else 1842 { 1843 usedSamplers |= 1 << dst.index; 1844 } 1845 } 1846 break; 1847 default: 1848 break; 1849 } 1850 } 1851 } 1852 1853 // Assigns a unique index to each call instruction, on a per label basis. 1854 // This is used to know what basic block to return to. analyzeCallSites()1855 void Shader::analyzeCallSites() 1856 { 1857 int callSiteIndex[2048] = {0}; 1858 1859 for(unsigned int i = 0; i < instruction.size(); i++) 1860 { 1861 if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ) 1862 { 1863 int label = instruction[i]->dst.label; 1864 1865 instruction[i]->dst.callSite = callSiteIndex[label]++; 1866 } 1867 } 1868 } 1869 analyzeDynamicIndexing()1870 void Shader::analyzeDynamicIndexing() 1871 { 1872 dynamicallyIndexedTemporaries = false; 1873 dynamicallyIndexedInput = false; 1874 dynamicallyIndexedOutput = false; 1875 1876 for(unsigned int i = 0; i < instruction.size(); i++) 1877 { 1878 if(instruction[i]->dst.rel.type == PARAMETER_ADDR || 1879 instruction[i]->dst.rel.type == PARAMETER_LOOP || 1880 instruction[i]->dst.rel.type == PARAMETER_TEMP || 1881 instruction[i]->dst.rel.type == PARAMETER_CONST) 1882 { 1883 switch(instruction[i]->dst.type) 1884 { 1885 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1886 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1887 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1888 default: break; 1889 } 1890 } 1891 1892 for(int j = 0; j < 3; j++) 1893 { 1894 if(instruction[i]->src[j].rel.type == PARAMETER_ADDR || 1895 instruction[i]->src[j].rel.type == PARAMETER_LOOP || 1896 instruction[i]->src[j].rel.type == PARAMETER_TEMP || 1897 instruction[i]->src[j].rel.type == PARAMETER_CONST) 1898 { 1899 switch(instruction[i]->src[j].type) 1900 { 1901 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1902 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1903 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1904 default: break; 1905 } 1906 } 1907 } 1908 } 1909 } 1910 } 1911