1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Shader.hpp" 16 17 #include "VertexShader.hpp" 18 #include "PixelShader.hpp" 19 #include "Common/Math.hpp" 20 #include "Common/Debug.hpp" 21 22 #include <set> 23 #include <fstream> 24 #include <sstream> 25 #include <stdarg.h> 26 27 namespace sw 28 { 29 volatile int Shader::serialCounter = 1; 30 OPCODE_DP(int i)31 Shader::Opcode Shader::OPCODE_DP(int i) 32 { 33 switch(i) 34 { 35 default: ASSERT(false); 36 case 1: return OPCODE_DP1; 37 case 2: return OPCODE_DP2; 38 case 3: return OPCODE_DP3; 39 case 4: return OPCODE_DP4; 40 } 41 } 42 OPCODE_LEN(int i)43 Shader::Opcode Shader::OPCODE_LEN(int i) 44 { 45 switch(i) 46 { 47 default: ASSERT(false); 48 case 1: return OPCODE_ABS; 49 case 2: return OPCODE_LEN2; 50 case 3: return OPCODE_LEN3; 51 case 4: return OPCODE_LEN4; 52 } 53 } 54 OPCODE_DIST(int i)55 Shader::Opcode Shader::OPCODE_DIST(int i) 56 { 57 switch(i) 58 { 59 default: ASSERT(false); 60 case 1: return OPCODE_DIST1; 61 case 2: return OPCODE_DIST2; 62 case 3: return OPCODE_DIST3; 63 case 4: return OPCODE_DIST4; 64 } 65 } 66 OPCODE_NRM(int i)67 Shader::Opcode Shader::OPCODE_NRM(int i) 68 { 69 switch(i) 70 { 71 default: ASSERT(false); 72 case 1: return OPCODE_SGN; 73 case 2: return OPCODE_NRM2; 74 case 3: return OPCODE_NRM3; 75 case 4: return OPCODE_NRM4; 76 } 77 } 78 OPCODE_FORWARD(int i)79 Shader::Opcode Shader::OPCODE_FORWARD(int i) 80 { 81 switch(i) 82 { 83 default: ASSERT(false); 84 case 1: return OPCODE_FORWARD1; 85 case 2: return OPCODE_FORWARD2; 86 case 3: return OPCODE_FORWARD3; 87 case 4: return OPCODE_FORWARD4; 88 } 89 } 90 OPCODE_REFLECT(int i)91 Shader::Opcode Shader::OPCODE_REFLECT(int i) 92 { 93 switch(i) 94 { 95 default: ASSERT(false); 96 case 1: return OPCODE_REFLECT1; 97 case 2: return OPCODE_REFLECT2; 98 case 3: return OPCODE_REFLECT3; 99 case 4: return OPCODE_REFLECT4; 100 } 101 } 102 OPCODE_REFRACT(int i)103 Shader::Opcode Shader::OPCODE_REFRACT(int i) 104 { 105 switch(i) 106 { 107 default: ASSERT(false); 108 case 1: return OPCODE_REFRACT1; 109 case 2: return OPCODE_REFRACT2; 110 case 3: return OPCODE_REFRACT3; 111 case 4: return OPCODE_REFRACT4; 112 } 113 } 114 Instruction(Opcode opcode)115 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) 116 { 117 control = CONTROL_RESERVED0; 118 119 predicate = false; 120 predicateNot = false; 121 predicateSwizzle = 0xE4; 122 123 coissue = false; 124 samplerType = SAMPLER_UNKNOWN; 125 usage = USAGE_POSITION; 126 usageIndex = 0; 127 } 128 Instruction(const unsigned long * token,int size,unsigned char majorVersion)129 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) 130 { 131 parseOperationToken(*token++, majorVersion); 132 133 samplerType = SAMPLER_UNKNOWN; 134 usage = USAGE_POSITION; 135 usageIndex = 0; 136 137 if(opcode == OPCODE_IF || 138 opcode == OPCODE_IFC || 139 opcode == OPCODE_LOOP || 140 opcode == OPCODE_REP || 141 opcode == OPCODE_BREAKC || 142 opcode == OPCODE_BREAKP) // No destination operand 143 { 144 if(size > 0) parseSourceToken(0, token++, majorVersion); 145 if(size > 1) parseSourceToken(1, token++, majorVersion); 146 if(size > 2) parseSourceToken(2, token++, majorVersion); 147 if(size > 3) ASSERT(false); 148 } 149 else if(opcode == OPCODE_DCL) 150 { 151 parseDeclarationToken(*token++); 152 parseDestinationToken(token++, majorVersion); 153 } 154 else 155 { 156 if(size > 0) 157 { 158 parseDestinationToken(token, majorVersion); 159 160 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) 161 { 162 token++; 163 size--; 164 } 165 166 token++; 167 size--; 168 } 169 170 if(predicate) 171 { 172 ASSERT(size != 0); 173 174 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; 175 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); 176 177 token++; 178 size--; 179 } 180 181 for(int i = 0; size > 0; i++) 182 { 183 parseSourceToken(i, token, majorVersion); 184 185 token++; 186 size--; 187 188 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) 189 { 190 token++; 191 size--; 192 } 193 } 194 } 195 } 196 ~Instruction()197 Shader::Instruction::~Instruction() 198 { 199 } 200 string(ShaderType shaderType,unsigned short version) const201 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const 202 { 203 std::string instructionString; 204 205 if(opcode != OPCODE_DCL) 206 { 207 instructionString += coissue ? "+ " : ""; 208 209 if(predicate) 210 { 211 instructionString += predicateNot ? "(!p0" : "(p0"; 212 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); 213 instructionString += ") "; 214 } 215 216 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); 217 218 if(dst.type != PARAMETER_VOID) 219 { 220 instructionString += " " + dst.string(shaderType, version) + 221 dst.relativeString() + 222 dst.maskString(); 223 } 224 225 for(int i = 0; i < 4; i++) 226 { 227 if(src[i].type != PARAMETER_VOID) 228 { 229 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " "; 230 instructionString += src[i].preModifierString() + 231 src[i].string(shaderType, version) + 232 src[i].relativeString() + 233 src[i].postModifierString() + 234 src[i].swizzleString(); 235 } 236 } 237 } 238 else // DCL 239 { 240 instructionString += "dcl"; 241 242 if(dst.type == PARAMETER_SAMPLER) 243 { 244 switch(samplerType) 245 { 246 case SAMPLER_UNKNOWN: instructionString += " "; break; 247 case SAMPLER_1D: instructionString += "_1d "; break; 248 case SAMPLER_2D: instructionString += "_2d "; break; 249 case SAMPLER_CUBE: instructionString += "_cube "; break; 250 case SAMPLER_VOLUME: instructionString += "_volume "; break; 251 default: 252 ASSERT(false); 253 } 254 255 instructionString += dst.string(shaderType, version); 256 } 257 else if(dst.type == PARAMETER_INPUT || 258 dst.type == PARAMETER_OUTPUT || 259 dst.type == PARAMETER_TEXTURE) 260 { 261 if(version >= 0x0300) 262 { 263 switch(usage) 264 { 265 case USAGE_POSITION: instructionString += "_position"; break; 266 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; 267 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break; 268 case USAGE_NORMAL: instructionString += "_normal"; break; 269 case USAGE_PSIZE: instructionString += "_psize"; break; 270 case USAGE_TEXCOORD: instructionString += "_texcoord"; break; 271 case USAGE_TANGENT: instructionString += "_tangent"; break; 272 case USAGE_BINORMAL: instructionString += "_binormal"; break; 273 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; 274 case USAGE_POSITIONT: instructionString += "_positiont"; break; 275 case USAGE_COLOR: instructionString += "_color"; break; 276 case USAGE_FOG: instructionString += "_fog"; break; 277 case USAGE_DEPTH: instructionString += "_depth"; break; 278 case USAGE_SAMPLE: instructionString += "_sample"; break; 279 default: 280 ASSERT(false); 281 } 282 283 if(usageIndex > 0) 284 { 285 std::ostringstream buffer; 286 287 buffer << (int)usageIndex; 288 289 instructionString += buffer.str(); 290 } 291 } 292 else ASSERT(dst.type != PARAMETER_OUTPUT); 293 294 instructionString += " "; 295 296 instructionString += dst.string(shaderType, version); 297 instructionString += dst.maskString(); 298 } 299 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace 300 { 301 instructionString += " "; 302 303 instructionString += dst.string(shaderType, version); 304 } 305 else ASSERT(false); 306 } 307 308 return instructionString; 309 } 310 modifierString() const311 std::string Shader::DestinationParameter::modifierString() const 312 { 313 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 314 { 315 return ""; 316 } 317 318 std::string modifierString; 319 320 if(saturate) 321 { 322 modifierString += "_sat"; 323 } 324 325 if(partialPrecision) 326 { 327 modifierString += "_pp"; 328 } 329 330 if(centroid) 331 { 332 modifierString += "_centroid"; 333 } 334 335 return modifierString; 336 } 337 shiftString() const338 std::string Shader::DestinationParameter::shiftString() const 339 { 340 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 341 { 342 return ""; 343 } 344 345 switch(shift) 346 { 347 case 0: return ""; 348 case 1: return "_x2"; 349 case 2: return "_x4"; 350 case 3: return "_x8"; 351 case -1: return "_d2"; 352 case -2: return "_d4"; 353 case -3: return "_d8"; 354 default: 355 return ""; 356 // ASSERT(false); // FIXME 357 } 358 } 359 maskString() const360 std::string Shader::DestinationParameter::maskString() const 361 { 362 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 363 { 364 return ""; 365 } 366 367 switch(mask) 368 { 369 case 0x0: return ""; 370 case 0x1: return ".x"; 371 case 0x2: return ".y"; 372 case 0x3: return ".xy"; 373 case 0x4: return ".z"; 374 case 0x5: return ".xz"; 375 case 0x6: return ".yz"; 376 case 0x7: return ".xyz"; 377 case 0x8: return ".w"; 378 case 0x9: return ".xw"; 379 case 0xA: return ".yw"; 380 case 0xB: return ".xyw"; 381 case 0xC: return ".zw"; 382 case 0xD: return ".xzw"; 383 case 0xE: return ".yzw"; 384 case 0xF: return ""; 385 default: 386 ASSERT(false); 387 } 388 389 return ""; 390 } 391 preModifierString() const392 std::string Shader::SourceParameter::preModifierString() const 393 { 394 if(type == PARAMETER_VOID) 395 { 396 return ""; 397 } 398 399 switch(modifier) 400 { 401 case MODIFIER_NONE: return ""; 402 case MODIFIER_NEGATE: return "-"; 403 case MODIFIER_BIAS: return ""; 404 case MODIFIER_BIAS_NEGATE: return "-"; 405 case MODIFIER_SIGN: return ""; 406 case MODIFIER_SIGN_NEGATE: return "-"; 407 case MODIFIER_COMPLEMENT: return "1-"; 408 case MODIFIER_X2: return ""; 409 case MODIFIER_X2_NEGATE: return "-"; 410 case MODIFIER_DZ: return ""; 411 case MODIFIER_DW: return ""; 412 case MODIFIER_ABS: return ""; 413 case MODIFIER_ABS_NEGATE: return "-"; 414 case MODIFIER_NOT: return "!"; 415 default: 416 ASSERT(false); 417 } 418 419 return ""; 420 } 421 relativeString() const422 std::string Shader::Parameter::relativeString() const 423 { 424 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) 425 { 426 if(rel.type == PARAMETER_VOID) 427 { 428 return ""; 429 } 430 else if(rel.type == PARAMETER_ADDR) 431 { 432 switch(rel.swizzle & 0x03) 433 { 434 case 0: return "[a0.x]"; 435 case 1: return "[a0.y]"; 436 case 2: return "[a0.z]"; 437 case 3: return "[a0.w]"; 438 } 439 } 440 else if(rel.type == PARAMETER_TEMP) 441 { 442 std::ostringstream buffer; 443 buffer << rel.index; 444 445 switch(rel.swizzle & 0x03) 446 { 447 case 0: return "[r" + buffer.str() + ".x]"; 448 case 1: return "[r" + buffer.str() + ".y]"; 449 case 2: return "[r" + buffer.str() + ".z]"; 450 case 3: return "[r" + buffer.str() + ".w]"; 451 } 452 } 453 else if(rel.type == PARAMETER_LOOP) 454 { 455 return "[aL]"; 456 } 457 else if(rel.type == PARAMETER_CONST) 458 { 459 std::ostringstream buffer; 460 buffer << rel.index; 461 462 switch(rel.swizzle & 0x03) 463 { 464 case 0: return "[c" + buffer.str() + ".x]"; 465 case 1: return "[c" + buffer.str() + ".y]"; 466 case 2: return "[c" + buffer.str() + ".z]"; 467 case 3: return "[c" + buffer.str() + ".w]"; 468 } 469 } 470 else ASSERT(false); 471 } 472 473 return ""; 474 } 475 postModifierString() const476 std::string Shader::SourceParameter::postModifierString() const 477 { 478 if(type == PARAMETER_VOID) 479 { 480 return ""; 481 } 482 483 switch(modifier) 484 { 485 case MODIFIER_NONE: return ""; 486 case MODIFIER_NEGATE: return ""; 487 case MODIFIER_BIAS: return "_bias"; 488 case MODIFIER_BIAS_NEGATE: return "_bias"; 489 case MODIFIER_SIGN: return "_bx2"; 490 case MODIFIER_SIGN_NEGATE: return "_bx2"; 491 case MODIFIER_COMPLEMENT: return ""; 492 case MODIFIER_X2: return "_x2"; 493 case MODIFIER_X2_NEGATE: return "_x2"; 494 case MODIFIER_DZ: return "_dz"; 495 case MODIFIER_DW: return "_dw"; 496 case MODIFIER_ABS: return "_abs"; 497 case MODIFIER_ABS_NEGATE: return "_abs"; 498 case MODIFIER_NOT: return ""; 499 default: 500 ASSERT(false); 501 } 502 503 return ""; 504 } 505 string(ShaderType shaderType,unsigned short version) const506 std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const 507 { 508 if(type == PARAMETER_CONST && bufferIndex >= 0) 509 { 510 std::ostringstream buffer; 511 buffer << bufferIndex; 512 513 std::ostringstream offset; 514 offset << index; 515 516 return "cb" + buffer.str() + "[" + offset.str() + "]"; 517 } 518 else 519 { 520 return Parameter::string(shaderType, version); 521 } 522 } 523 swizzleString() const524 std::string Shader::SourceParameter::swizzleString() const 525 { 526 return Instruction::swizzleString(type, swizzle); 527 } 528 parseOperationToken(unsigned long token,unsigned char majorVersion)529 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion) 530 { 531 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token 532 { 533 opcode = (Opcode)token; 534 535 control = CONTROL_RESERVED0; 536 predicate = false; 537 coissue = false; 538 } 539 else 540 { 541 opcode = (Opcode)(token & 0x0000FFFF); 542 control = (Control)((token & 0x00FF0000) >> 16); 543 544 int size = (token & 0x0F000000) >> 24; 545 546 predicate = (token & 0x10000000) != 0x00000000; 547 coissue = (token & 0x40000000) != 0x00000000; 548 549 if(majorVersion < 2) 550 { 551 if(size != 0) 552 { 553 ASSERT(false); // Reserved 554 } 555 } 556 557 if(majorVersion < 2) 558 { 559 if(predicate) 560 { 561 ASSERT(false); 562 } 563 } 564 565 if((token & 0x20000000) != 0x00000000) 566 { 567 ASSERT(false); // Reserved 568 } 569 570 if(majorVersion >= 2) 571 { 572 if(coissue) 573 { 574 ASSERT(false); // Reserved 575 } 576 } 577 578 if((token & 0x80000000) != 0x00000000) 579 { 580 ASSERT(false); 581 } 582 } 583 } 584 parseDeclarationToken(unsigned long token)585 void Shader::Instruction::parseDeclarationToken(unsigned long token) 586 { 587 samplerType = (SamplerType)((token & 0x78000000) >> 27); 588 usage = (Usage)(token & 0x0000001F); 589 usageIndex = (unsigned char)((token & 0x000F0000) >> 16); 590 } 591 parseDestinationToken(const unsigned long * token,unsigned char majorVersion)592 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) 593 { 594 dst.index = (unsigned short)(token[0] & 0x000007FF); 595 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 596 597 // TODO: Check type and index range 598 599 bool relative = (token[0] & 0x00002000) != 0x00000000; 600 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 601 dst.rel.swizzle = 0x00; 602 dst.rel.scale = 1; 603 604 if(relative && majorVersion >= 3) 605 { 606 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 607 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 608 } 609 else if(relative) ASSERT(false); // Reserved 610 611 if((token[0] & 0x0000C000) != 0x00000000) 612 { 613 ASSERT(false); // Reserved 614 } 615 616 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); 617 dst.saturate = (token[0] & 0x00100000) != 0; 618 dst.partialPrecision = (token[0] & 0x00200000) != 0; 619 dst.centroid = (token[0] & 0x00400000) != 0; 620 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; 621 622 if(majorVersion >= 2) 623 { 624 if(dst.shift) 625 { 626 ASSERT(false); // Reserved 627 } 628 } 629 630 if((token[0] & 0x80000000) != 0x80000000) 631 { 632 ASSERT(false); 633 } 634 } 635 parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)636 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) 637 { 638 // Defaults 639 src[i].index = 0; 640 src[i].type = PARAMETER_VOID; 641 src[i].modifier = MODIFIER_NONE; 642 src[i].swizzle = 0xE4; 643 src[i].rel.type = PARAMETER_VOID; 644 src[i].rel.swizzle = 0x00; 645 src[i].rel.scale = 1; 646 647 switch(opcode) 648 { 649 case OPCODE_DEF: 650 src[0].type = PARAMETER_FLOAT4LITERAL; 651 src[0].value[i] = *(float*)token; 652 break; 653 case OPCODE_DEFB: 654 src[0].type = PARAMETER_BOOL1LITERAL; 655 src[0].boolean[0] = *(int*)token; 656 break; 657 case OPCODE_DEFI: 658 src[0].type = PARAMETER_INT4LITERAL; 659 src[0].integer[i] = *(int*)token; 660 break; 661 default: 662 src[i].index = (unsigned short)(token[0] & 0x000007FF); 663 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 664 665 // FIXME: Check type and index range 666 667 bool relative = (token[0] & 0x00002000) != 0x00000000; 668 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 669 670 if((token[0] & 0x0000C000) != 0x00000000) 671 { 672 if(opcode != OPCODE_DEF && 673 opcode != OPCODE_DEFI && 674 opcode != OPCODE_DEFB) 675 { 676 ASSERT(false); 677 } 678 } 679 680 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); 681 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); 682 683 if((token[0] & 0x80000000) != 0x80000000) 684 { 685 if(opcode != OPCODE_DEF && 686 opcode != OPCODE_DEFI && 687 opcode != OPCODE_DEFB) 688 { 689 ASSERT(false); 690 } 691 } 692 693 if(relative && majorVersion >= 2) 694 { 695 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 696 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 697 } 698 } 699 } 700 swizzleString(ParameterType type,unsigned char swizzle)701 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) 702 { 703 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) 704 { 705 return ""; 706 } 707 708 int x = (swizzle & 0x03) >> 0; 709 int y = (swizzle & 0x0C) >> 2; 710 int z = (swizzle & 0x30) >> 4; 711 int w = (swizzle & 0xC0) >> 6; 712 713 std::string swizzleString = "."; 714 715 switch(x) 716 { 717 case 0: swizzleString += "x"; break; 718 case 1: swizzleString += "y"; break; 719 case 2: swizzleString += "z"; break; 720 case 3: swizzleString += "w"; break; 721 } 722 723 if(!(x == y && y == z && z == w)) 724 { 725 switch(y) 726 { 727 case 0: swizzleString += "x"; break; 728 case 1: swizzleString += "y"; break; 729 case 2: swizzleString += "z"; break; 730 case 3: swizzleString += "w"; break; 731 } 732 733 if(!(y == z && z == w)) 734 { 735 switch(z) 736 { 737 case 0: swizzleString += "x"; break; 738 case 1: swizzleString += "y"; break; 739 case 2: swizzleString += "z"; break; 740 case 3: swizzleString += "w"; break; 741 } 742 743 if(!(z == w)) 744 { 745 switch(w) 746 { 747 case 0: swizzleString += "x"; break; 748 case 1: swizzleString += "y"; break; 749 case 2: swizzleString += "z"; break; 750 case 3: swizzleString += "w"; break; 751 } 752 } 753 } 754 } 755 756 return swizzleString; 757 } 758 operationString(unsigned short version) const759 std::string Shader::Instruction::operationString(unsigned short version) const 760 { 761 switch(opcode) 762 { 763 case OPCODE_NULL: return "null"; 764 case OPCODE_NOP: return "nop"; 765 case OPCODE_MOV: return "mov"; 766 case OPCODE_ADD: return "add"; 767 case OPCODE_IADD: return "iadd"; 768 case OPCODE_SUB: return "sub"; 769 case OPCODE_ISUB: return "isub"; 770 case OPCODE_MAD: return "mad"; 771 case OPCODE_IMAD: return "imad"; 772 case OPCODE_MUL: return "mul"; 773 case OPCODE_IMUL: return "imul"; 774 case OPCODE_RCPX: return "rcpx"; 775 case OPCODE_DIV: return "div"; 776 case OPCODE_IDIV: return "idiv"; 777 case OPCODE_UDIV: return "udiv"; 778 case OPCODE_MOD: return "mod"; 779 case OPCODE_IMOD: return "imod"; 780 case OPCODE_UMOD: return "umod"; 781 case OPCODE_SHL: return "shl"; 782 case OPCODE_ISHR: return "ishr"; 783 case OPCODE_USHR: return "ushr"; 784 case OPCODE_RSQX: return "rsqx"; 785 case OPCODE_SQRT: return "sqrt"; 786 case OPCODE_RSQ: return "rsq"; 787 case OPCODE_LEN2: return "len2"; 788 case OPCODE_LEN3: return "len3"; 789 case OPCODE_LEN4: return "len4"; 790 case OPCODE_DIST1: return "dist1"; 791 case OPCODE_DIST2: return "dist2"; 792 case OPCODE_DIST3: return "dist3"; 793 case OPCODE_DIST4: return "dist4"; 794 case OPCODE_DP3: return "dp3"; 795 case OPCODE_DP4: return "dp4"; 796 case OPCODE_DET2: return "det2"; 797 case OPCODE_DET3: return "det3"; 798 case OPCODE_DET4: return "det4"; 799 case OPCODE_MIN: return "min"; 800 case OPCODE_IMIN: return "imin"; 801 case OPCODE_UMIN: return "umin"; 802 case OPCODE_MAX: return "max"; 803 case OPCODE_IMAX: return "imax"; 804 case OPCODE_UMAX: return "umax"; 805 case OPCODE_SLT: return "slt"; 806 case OPCODE_SGE: return "sge"; 807 case OPCODE_EXP2X: return "exp2x"; 808 case OPCODE_LOG2X: return "log2x"; 809 case OPCODE_LIT: return "lit"; 810 case OPCODE_ATT: return "att"; 811 case OPCODE_LRP: return "lrp"; 812 case OPCODE_STEP: return "step"; 813 case OPCODE_SMOOTH: return "smooth"; 814 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; 815 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt"; 816 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; 817 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat"; 818 case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; 819 case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; 820 case OPCODE_PACKHALF2x16: return "packHalf2x16"; 821 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16"; 822 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16"; 823 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; 824 case OPCODE_FRC: return "frc"; 825 case OPCODE_M4X4: return "m4x4"; 826 case OPCODE_M4X3: return "m4x3"; 827 case OPCODE_M3X4: return "m3x4"; 828 case OPCODE_M3X3: return "m3x3"; 829 case OPCODE_M3X2: return "m3x2"; 830 case OPCODE_CALL: return "call"; 831 case OPCODE_CALLNZ: return "callnz"; 832 case OPCODE_LOOP: return "loop"; 833 case OPCODE_RET: return "ret"; 834 case OPCODE_ENDLOOP: return "endloop"; 835 case OPCODE_LABEL: return "label"; 836 case OPCODE_DCL: return "dcl"; 837 case OPCODE_POWX: return "powx"; 838 case OPCODE_CRS: return "crs"; 839 case OPCODE_SGN: return "sgn"; 840 case OPCODE_ISGN: return "isgn"; 841 case OPCODE_ABS: return "abs"; 842 case OPCODE_IABS: return "iabs"; 843 case OPCODE_NRM2: return "nrm2"; 844 case OPCODE_NRM3: return "nrm3"; 845 case OPCODE_NRM4: return "nrm4"; 846 case OPCODE_SINCOS: return "sincos"; 847 case OPCODE_REP: return "rep"; 848 case OPCODE_ENDREP: return "endrep"; 849 case OPCODE_IF: return "if"; 850 case OPCODE_IFC: return "ifc"; 851 case OPCODE_ELSE: return "else"; 852 case OPCODE_ENDIF: return "endif"; 853 case OPCODE_BREAK: return "break"; 854 case OPCODE_BREAKC: return "breakc"; 855 case OPCODE_MOVA: return "mova"; 856 case OPCODE_DEFB: return "defb"; 857 case OPCODE_DEFI: return "defi"; 858 case OPCODE_TEXCOORD: return "texcoord"; 859 case OPCODE_TEXKILL: return "texkill"; 860 case OPCODE_DISCARD: return "discard"; 861 case OPCODE_TEX: 862 if(version < 0x0104) return "tex"; 863 else return "texld"; 864 case OPCODE_TEXBEM: return "texbem"; 865 case OPCODE_TEXBEML: return "texbeml"; 866 case OPCODE_TEXREG2AR: return "texreg2ar"; 867 case OPCODE_TEXREG2GB: return "texreg2gb"; 868 case OPCODE_TEXM3X2PAD: return "texm3x2pad"; 869 case OPCODE_TEXM3X2TEX: return "texm3x2tex"; 870 case OPCODE_TEXM3X3PAD: return "texm3x3pad"; 871 case OPCODE_TEXM3X3TEX: return "texm3x3tex"; 872 case OPCODE_RESERVED0: return "reserved0"; 873 case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; 874 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; 875 case OPCODE_EXPP: return "expp"; 876 case OPCODE_LOGP: return "logp"; 877 case OPCODE_CND: return "cnd"; 878 case OPCODE_DEF: return "def"; 879 case OPCODE_TEXREG2RGB: return "texreg2rgb"; 880 case OPCODE_TEXDP3TEX: return "texdp3tex"; 881 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; 882 case OPCODE_TEXDP3: return "texdp3"; 883 case OPCODE_TEXM3X3: return "texm3x3"; 884 case OPCODE_TEXDEPTH: return "texdepth"; 885 case OPCODE_CMP0: return "cmp0"; 886 case OPCODE_ICMP: return "icmp"; 887 case OPCODE_UCMP: return "ucmp"; 888 case OPCODE_SELECT: return "select"; 889 case OPCODE_EXTRACT: return "extract"; 890 case OPCODE_INSERT: return "insert"; 891 case OPCODE_BEM: return "bem"; 892 case OPCODE_DP2ADD: return "dp2add"; 893 case OPCODE_DFDX: return "dFdx"; 894 case OPCODE_DFDY: return "dFdy"; 895 case OPCODE_FWIDTH: return "fwidth"; 896 case OPCODE_TEXLDD: return "texldd"; 897 case OPCODE_CMP: return "cmp"; 898 case OPCODE_TEXLDL: return "texldl"; 899 case OPCODE_TEXBIAS: return "texbias"; 900 case OPCODE_TEXOFFSET: return "texoffset"; 901 case OPCODE_TEXOFFSETBIAS: return "texoffsetbias"; 902 case OPCODE_TEXLODOFFSET: return "texlodoffset"; 903 case OPCODE_TEXELFETCH: return "texelfetch"; 904 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset"; 905 case OPCODE_TEXGRAD: return "texgrad"; 906 case OPCODE_TEXGRADOFFSET: return "texgradoffset"; 907 case OPCODE_BREAKP: return "breakp"; 908 case OPCODE_TEXSIZE: return "texsize"; 909 case OPCODE_PHASE: return "phase"; 910 case OPCODE_COMMENT: return "comment"; 911 case OPCODE_END: return "end"; 912 case OPCODE_PS_1_0: return "ps_1_0"; 913 case OPCODE_PS_1_1: return "ps_1_1"; 914 case OPCODE_PS_1_2: return "ps_1_2"; 915 case OPCODE_PS_1_3: return "ps_1_3"; 916 case OPCODE_PS_1_4: return "ps_1_4"; 917 case OPCODE_PS_2_0: return "ps_2_0"; 918 case OPCODE_PS_2_x: return "ps_2_x"; 919 case OPCODE_PS_3_0: return "ps_3_0"; 920 case OPCODE_VS_1_0: return "vs_1_0"; 921 case OPCODE_VS_1_1: return "vs_1_1"; 922 case OPCODE_VS_2_0: return "vs_2_0"; 923 case OPCODE_VS_2_x: return "vs_2_x"; 924 case OPCODE_VS_2_sw: return "vs_2_sw"; 925 case OPCODE_VS_3_0: return "vs_3_0"; 926 case OPCODE_VS_3_sw: return "vs_3_sw"; 927 case OPCODE_WHILE: return "while"; 928 case OPCODE_ENDWHILE: return "endwhile"; 929 case OPCODE_COS: return "cos"; 930 case OPCODE_SIN: return "sin"; 931 case OPCODE_TAN: return "tan"; 932 case OPCODE_ACOS: return "acos"; 933 case OPCODE_ASIN: return "asin"; 934 case OPCODE_ATAN: return "atan"; 935 case OPCODE_ATAN2: return "atan2"; 936 case OPCODE_COSH: return "cosh"; 937 case OPCODE_SINH: return "sinh"; 938 case OPCODE_TANH: return "tanh"; 939 case OPCODE_ACOSH: return "acosh"; 940 case OPCODE_ASINH: return "asinh"; 941 case OPCODE_ATANH: return "atanh"; 942 case OPCODE_DP1: return "dp1"; 943 case OPCODE_DP2: return "dp2"; 944 case OPCODE_TRUNC: return "trunc"; 945 case OPCODE_FLOOR: return "floor"; 946 case OPCODE_ROUND: return "round"; 947 case OPCODE_ROUNDEVEN: return "roundEven"; 948 case OPCODE_CEIL: return "ceil"; 949 case OPCODE_EXP2: return "exp2"; 950 case OPCODE_LOG2: return "log2"; 951 case OPCODE_EXP: return "exp"; 952 case OPCODE_LOG: return "log"; 953 case OPCODE_POW: return "pow"; 954 case OPCODE_F2B: return "f2b"; 955 case OPCODE_B2F: return "b2f"; 956 case OPCODE_F2I: return "f2i"; 957 case OPCODE_I2F: return "i2f"; 958 case OPCODE_F2U: return "f2u"; 959 case OPCODE_U2F: return "u2f"; 960 case OPCODE_B2I: return "b2i"; 961 case OPCODE_I2B: return "i2b"; 962 case OPCODE_ALL: return "all"; 963 case OPCODE_ANY: return "any"; 964 case OPCODE_NEG: return "neg"; 965 case OPCODE_INEG: return "ineg"; 966 case OPCODE_ISNAN: return "isnan"; 967 case OPCODE_ISINF: return "isinf"; 968 case OPCODE_NOT: return "not"; 969 case OPCODE_OR: return "or"; 970 case OPCODE_XOR: return "xor"; 971 case OPCODE_AND: return "and"; 972 case OPCODE_EQ: return "eq"; 973 case OPCODE_NE: return "neq"; 974 case OPCODE_FORWARD1: return "forward1"; 975 case OPCODE_FORWARD2: return "forward2"; 976 case OPCODE_FORWARD3: return "forward3"; 977 case OPCODE_FORWARD4: return "forward4"; 978 case OPCODE_REFLECT1: return "reflect1"; 979 case OPCODE_REFLECT2: return "reflect2"; 980 case OPCODE_REFLECT3: return "reflect3"; 981 case OPCODE_REFLECT4: return "reflect4"; 982 case OPCODE_REFRACT1: return "refract1"; 983 case OPCODE_REFRACT2: return "refract2"; 984 case OPCODE_REFRACT3: return "refract3"; 985 case OPCODE_REFRACT4: return "refract4"; 986 case OPCODE_LEAVE: return "leave"; 987 case OPCODE_CONTINUE: return "continue"; 988 case OPCODE_TEST: return "test"; 989 case OPCODE_SWITCH: return "switch"; 990 case OPCODE_ENDSWITCH: return "endswitch"; 991 default: 992 ASSERT(false); 993 } 994 995 return "<unknown>"; 996 } 997 controlString() const998 std::string Shader::Instruction::controlString() const 999 { 1000 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) 1001 { 1002 if(project) return "p"; 1003 1004 if(bias) return "b"; 1005 1006 // FIXME: LOD 1007 } 1008 1009 switch(control) 1010 { 1011 case 1: return "_gt"; 1012 case 2: return "_eq"; 1013 case 3: return "_ge"; 1014 case 4: return "_lt"; 1015 case 5: return "_ne"; 1016 case 6: return "_le"; 1017 default: 1018 return ""; 1019 // ASSERT(false); // FIXME 1020 } 1021 } 1022 string(ShaderType shaderType,unsigned short version) const1023 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const 1024 { 1025 std::ostringstream buffer; 1026 1027 if(type == PARAMETER_FLOAT4LITERAL) 1028 { 1029 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; 1030 1031 return buffer.str(); 1032 } 1033 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) 1034 { 1035 buffer << index; 1036 1037 return typeString(shaderType, version) + buffer.str(); 1038 } 1039 else 1040 { 1041 return typeString(shaderType, version); 1042 } 1043 } 1044 typeString(ShaderType shaderType,unsigned short version) const1045 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const 1046 { 1047 switch(type) 1048 { 1049 case PARAMETER_TEMP: return "r"; 1050 case PARAMETER_INPUT: return "v"; 1051 case PARAMETER_CONST: return "c"; 1052 case PARAMETER_TEXTURE: 1053 // case PARAMETER_ADDR: 1054 if(shaderType == SHADER_PIXEL) return "t"; 1055 else return "a0"; 1056 case PARAMETER_RASTOUT: 1057 if(index == 0) return "oPos"; 1058 else if(index == 1) return "oFog"; 1059 else if(index == 2) return "oPts"; 1060 else ASSERT(false); 1061 case PARAMETER_ATTROUT: return "oD"; 1062 case PARAMETER_TEXCRDOUT: 1063 // case PARAMETER_OUTPUT: return ""; 1064 if(version < 0x0300) return "oT"; 1065 else return "o"; 1066 case PARAMETER_CONSTINT: return "i"; 1067 case PARAMETER_COLOROUT: return "oC"; 1068 case PARAMETER_DEPTHOUT: return "oDepth"; 1069 case PARAMETER_SAMPLER: return "s"; 1070 // case PARAMETER_CONST2: return ""; 1071 // case PARAMETER_CONST3: return ""; 1072 // case PARAMETER_CONST4: return ""; 1073 case PARAMETER_CONSTBOOL: return "b"; 1074 case PARAMETER_LOOP: return "aL"; 1075 // case PARAMETER_TEMPFLOAT16: return ""; 1076 case PARAMETER_MISCTYPE: 1077 switch(index) 1078 { 1079 case VPosIndex: return "vPos"; 1080 case VFaceIndex: return "vFace"; 1081 case InstanceIDIndex: return "iID"; 1082 case VertexIDIndex: return "vID"; 1083 default: ASSERT(false); 1084 } 1085 case PARAMETER_LABEL: return "l"; 1086 case PARAMETER_PREDICATE: return "p0"; 1087 case PARAMETER_FLOAT4LITERAL: return ""; 1088 case PARAMETER_BOOL1LITERAL: return ""; 1089 case PARAMETER_INT4LITERAL: return ""; 1090 // case PARAMETER_VOID: return ""; 1091 default: 1092 ASSERT(false); 1093 } 1094 1095 return ""; 1096 } 1097 isBranch() const1098 bool Shader::Instruction::isBranch() const 1099 { 1100 return opcode == OPCODE_IF || opcode == OPCODE_IFC; 1101 } 1102 isCall() const1103 bool Shader::Instruction::isCall() const 1104 { 1105 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; 1106 } 1107 isBreak() const1108 bool Shader::Instruction::isBreak() const 1109 { 1110 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; 1111 } 1112 isLoop() const1113 bool Shader::Instruction::isLoop() const 1114 { 1115 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE; 1116 } 1117 isEndLoop() const1118 bool Shader::Instruction::isEndLoop() const 1119 { 1120 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE; 1121 } 1122 isPredicated() const1123 bool Shader::Instruction::isPredicated() const 1124 { 1125 return predicate || 1126 analysisBranch || 1127 analysisBreak || 1128 analysisContinue || 1129 analysisLeave; 1130 } 1131 Shader()1132 Shader::Shader() : serialID(serialCounter++) 1133 { 1134 usedSamplers = 0; 1135 } 1136 ~Shader()1137 Shader::~Shader() 1138 { 1139 for(auto &inst : instruction) 1140 { 1141 delete inst; 1142 inst = 0; 1143 } 1144 } 1145 parse(const unsigned long * token)1146 void Shader::parse(const unsigned long *token) 1147 { 1148 minorVersion = (unsigned char)(token[0] & 0x000000FF); 1149 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); 1150 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); 1151 1152 int length = 0; 1153 1154 if(shaderType == SHADER_VERTEX) 1155 { 1156 length = VertexShader::validate(token); 1157 } 1158 else if(shaderType == SHADER_PIXEL) 1159 { 1160 length = PixelShader::validate(token); 1161 } 1162 else ASSERT(false); 1163 1164 ASSERT(length != 0); 1165 instruction.resize(length); 1166 1167 for(int i = 0; i < length; i++) 1168 { 1169 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token 1170 { 1171 int length = (*token & 0x7FFF0000) >> 16; 1172 1173 token += length + 1; 1174 } 1175 1176 int tokenCount = size(*token); 1177 1178 instruction[i] = new Instruction(token, tokenCount, majorVersion); 1179 1180 token += 1 + tokenCount; 1181 } 1182 } 1183 size(unsigned long opcode) const1184 int Shader::size(unsigned long opcode) const 1185 { 1186 return size(opcode, shaderModel); 1187 } 1188 size(unsigned long opcode,unsigned short shaderModel)1189 int Shader::size(unsigned long opcode, unsigned short shaderModel) 1190 { 1191 if(shaderModel > 0x0300) 1192 { 1193 ASSERT(false); 1194 } 1195 1196 static const signed char size[] = 1197 { 1198 0, // NOP = 0 1199 2, // MOV 1200 3, // ADD 1201 3, // SUB 1202 4, // MAD 1203 3, // MUL 1204 2, // RCP 1205 2, // RSQ 1206 3, // DP3 1207 3, // DP4 1208 3, // MIN 1209 3, // MAX 1210 3, // SLT 1211 3, // SGE 1212 2, // EXP 1213 2, // LOG 1214 2, // LIT 1215 3, // DST 1216 4, // LRP 1217 2, // FRC 1218 3, // M4x4 1219 3, // M4x3 1220 3, // M3x4 1221 3, // M3x3 1222 3, // M3x2 1223 1, // CALL 1224 2, // CALLNZ 1225 2, // LOOP 1226 0, // RET 1227 0, // ENDLOOP 1228 1, // LABEL 1229 2, // DCL 1230 3, // POW 1231 3, // CRS 1232 4, // SGN 1233 2, // ABS 1234 2, // NRM 1235 4, // SINCOS 1236 1, // REP 1237 0, // ENDREP 1238 1, // IF 1239 2, // IFC 1240 0, // ELSE 1241 0, // ENDIF 1242 0, // BREAK 1243 2, // BREAKC 1244 2, // MOVA 1245 2, // DEFB 1246 5, // DEFI 1247 -1, // 49 1248 -1, // 50 1249 -1, // 51 1250 -1, // 52 1251 -1, // 53 1252 -1, // 54 1253 -1, // 55 1254 -1, // 56 1255 -1, // 57 1256 -1, // 58 1257 -1, // 59 1258 -1, // 60 1259 -1, // 61 1260 -1, // 62 1261 -1, // 63 1262 1, // TEXCOORD = 64 1263 1, // TEXKILL 1264 1, // TEX 1265 2, // TEXBEM 1266 2, // TEXBEML 1267 2, // TEXREG2AR 1268 2, // TEXREG2GB 1269 2, // TEXM3x2PAD 1270 2, // TEXM3x2TEX 1271 2, // TEXM3x3PAD 1272 2, // TEXM3x3TEX 1273 -1, // RESERVED0 1274 3, // TEXM3x3SPEC 1275 2, // TEXM3x3VSPEC 1276 2, // EXPP 1277 2, // LOGP 1278 4, // CND 1279 5, // DEF 1280 2, // TEXREG2RGB 1281 2, // TEXDP3TEX 1282 2, // TEXM3x2DEPTH 1283 2, // TEXDP3 1284 2, // TEXM3x3 1285 1, // TEXDEPTH 1286 4, // CMP 1287 3, // BEM 1288 4, // DP2ADD 1289 2, // DSX 1290 2, // DSY 1291 5, // TEXLDD 1292 3, // SETP 1293 3, // TEXLDL 1294 2, // BREAKP 1295 -1, // 97 1296 -1, // 98 1297 -1, // 99 1298 -1, // 100 1299 -1, // 101 1300 -1, // 102 1301 -1, // 103 1302 -1, // 104 1303 -1, // 105 1304 -1, // 106 1305 -1, // 107 1306 -1, // 108 1307 -1, // 109 1308 -1, // 110 1309 -1, // 111 1310 -1, // 112 1311 }; 1312 1313 int length = 0; 1314 1315 if((opcode & 0x0000FFFF) == OPCODE_COMMENT) 1316 { 1317 return (opcode & 0x7FFF0000) >> 16; 1318 } 1319 1320 if(opcode != OPCODE_PS_1_0 && 1321 opcode != OPCODE_PS_1_1 && 1322 opcode != OPCODE_PS_1_2 && 1323 opcode != OPCODE_PS_1_3 && 1324 opcode != OPCODE_PS_1_4 && 1325 opcode != OPCODE_PS_2_0 && 1326 opcode != OPCODE_PS_2_x && 1327 opcode != OPCODE_PS_3_0 && 1328 opcode != OPCODE_VS_1_0 && 1329 opcode != OPCODE_VS_1_1 && 1330 opcode != OPCODE_VS_2_0 && 1331 opcode != OPCODE_VS_2_x && 1332 opcode != OPCODE_VS_2_sw && 1333 opcode != OPCODE_VS_3_0 && 1334 opcode != OPCODE_VS_3_sw && 1335 opcode != OPCODE_PHASE && 1336 opcode != OPCODE_END) 1337 { 1338 if(shaderModel >= 0x0200) 1339 { 1340 length = (opcode & 0x0F000000) >> 24; 1341 } 1342 else 1343 { 1344 length = size[opcode & 0x0000FFFF]; 1345 } 1346 } 1347 1348 if(length < 0) 1349 { 1350 ASSERT(false); 1351 } 1352 1353 if(shaderModel == 0x0104) 1354 { 1355 switch(opcode & 0x0000FFFF) 1356 { 1357 case OPCODE_TEX: 1358 length += 1; 1359 break; 1360 case OPCODE_TEXCOORD: 1361 length += 1; 1362 break; 1363 default: 1364 break; 1365 } 1366 } 1367 1368 return length; 1369 } 1370 maskContainsComponent(int mask,int component)1371 bool Shader::maskContainsComponent(int mask, int component) 1372 { 1373 return (mask & (1 << component)) != 0; 1374 } 1375 swizzleContainsComponent(int swizzle,int component)1376 bool Shader::swizzleContainsComponent(int swizzle, int component) 1377 { 1378 if((swizzle & 0x03) >> 0 == component) return true; 1379 if((swizzle & 0x0C) >> 2 == component) return true; 1380 if((swizzle & 0x30) >> 4 == component) return true; 1381 if((swizzle & 0xC0) >> 6 == component) return true; 1382 1383 return false; 1384 } 1385 swizzleContainsComponentMasked(int swizzle,int component,int mask)1386 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask) 1387 { 1388 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true; 1389 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true; 1390 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true; 1391 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true; 1392 1393 return false; 1394 } 1395 containsDynamicBranching() const1396 bool Shader::containsDynamicBranching() const 1397 { 1398 return dynamicBranching; 1399 } 1400 containsBreakInstruction() const1401 bool Shader::containsBreakInstruction() const 1402 { 1403 return containsBreak; 1404 } 1405 containsContinueInstruction() const1406 bool Shader::containsContinueInstruction() const 1407 { 1408 return containsContinue; 1409 } 1410 containsLeaveInstruction() const1411 bool Shader::containsLeaveInstruction() const 1412 { 1413 return containsLeave; 1414 } 1415 containsDefineInstruction() const1416 bool Shader::containsDefineInstruction() const 1417 { 1418 return containsDefine; 1419 } 1420 usesSampler(int index) const1421 bool Shader::usesSampler(int index) const 1422 { 1423 return (usedSamplers & (1 << index)) != 0; 1424 } 1425 getSerialID() const1426 int Shader::getSerialID() const 1427 { 1428 return serialID; 1429 } 1430 getLength() const1431 size_t Shader::getLength() const 1432 { 1433 return instruction.size(); 1434 } 1435 getShaderType() const1436 Shader::ShaderType Shader::getShaderType() const 1437 { 1438 return shaderType; 1439 } 1440 getShaderModel() const1441 unsigned short Shader::getShaderModel() const 1442 { 1443 return shaderModel; 1444 } 1445 print(const char * fileName,...) const1446 void Shader::print(const char *fileName, ...) const 1447 { 1448 char fullName[1024 + 1]; 1449 1450 va_list vararg; 1451 va_start(vararg, fileName); 1452 vsnprintf(fullName, 1024, fileName, vararg); 1453 va_end(vararg); 1454 1455 std::ofstream file(fullName, std::ofstream::out); 1456 1457 for(const auto &inst : instruction) 1458 { 1459 file << inst->string(shaderType, shaderModel) << std::endl; 1460 } 1461 } 1462 printInstruction(int index,const char * fileName) const1463 void Shader::printInstruction(int index, const char *fileName) const 1464 { 1465 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app); 1466 1467 file << instruction[index]->string(shaderType, shaderModel) << std::endl; 1468 } 1469 append(Instruction * instruction)1470 void Shader::append(Instruction *instruction) 1471 { 1472 this->instruction.push_back(instruction); 1473 } 1474 declareSampler(int i)1475 void Shader::declareSampler(int i) 1476 { 1477 if(i >= 0 && i < 16) 1478 { 1479 usedSamplers |= 1 << i; 1480 } 1481 } 1482 getInstruction(size_t i) const1483 const Shader::Instruction *Shader::getInstruction(size_t i) const 1484 { 1485 ASSERT(i < instruction.size()); 1486 1487 return instruction[i]; 1488 } 1489 optimize()1490 void Shader::optimize() 1491 { 1492 optimizeLeave(); 1493 optimizeCall(); 1494 removeNull(); 1495 } 1496 optimizeLeave()1497 void Shader::optimizeLeave() 1498 { 1499 // A return (leave) right before the end of a function or the shader can be removed 1500 for(unsigned int i = 0; i < instruction.size(); i++) 1501 { 1502 if(instruction[i]->opcode == OPCODE_LEAVE) 1503 { 1504 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET) 1505 { 1506 instruction[i]->opcode = OPCODE_NULL; 1507 } 1508 } 1509 } 1510 } 1511 optimizeCall()1512 void Shader::optimizeCall() 1513 { 1514 // Eliminate uncalled functions 1515 std::set<int> calledFunctions; 1516 bool rescan = true; 1517 1518 while(rescan) 1519 { 1520 calledFunctions.clear(); 1521 rescan = false; 1522 1523 for(const auto &inst : instruction) 1524 { 1525 if(inst->isCall()) 1526 { 1527 calledFunctions.insert(inst->dst.label); 1528 } 1529 } 1530 1531 if(!calledFunctions.empty()) 1532 { 1533 for(unsigned int i = 0; i < instruction.size(); i++) 1534 { 1535 if(instruction[i]->opcode == OPCODE_LABEL) 1536 { 1537 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end()) 1538 { 1539 for( ; i < instruction.size(); i++) 1540 { 1541 Opcode oldOpcode = instruction[i]->opcode; 1542 instruction[i]->opcode = OPCODE_NULL; 1543 1544 if(oldOpcode == OPCODE_RET) 1545 { 1546 rescan = true; 1547 break; 1548 } 1549 } 1550 } 1551 } 1552 } 1553 } 1554 } 1555 1556 // Optimize the entry call 1557 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET) 1558 { 1559 if(calledFunctions.size() == 1) 1560 { 1561 instruction[0]->opcode = OPCODE_NULL; 1562 instruction[1]->opcode = OPCODE_NULL; 1563 1564 for(size_t i = 2; i < instruction.size(); i++) 1565 { 1566 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET) 1567 { 1568 instruction[i]->opcode = OPCODE_NULL; 1569 } 1570 } 1571 } 1572 } 1573 } 1574 removeNull()1575 void Shader::removeNull() 1576 { 1577 size_t size = 0; 1578 for(size_t i = 0; i < instruction.size(); i++) 1579 { 1580 if(instruction[i]->opcode != OPCODE_NULL) 1581 { 1582 instruction[size] = instruction[i]; 1583 size++; 1584 } 1585 else 1586 { 1587 delete instruction[i]; 1588 } 1589 } 1590 1591 instruction.resize(size); 1592 } 1593 analyzeDirtyConstants()1594 void Shader::analyzeDirtyConstants() 1595 { 1596 dirtyConstantsF = 0; 1597 dirtyConstantsI = 0; 1598 dirtyConstantsB = 0; 1599 1600 for(const auto &inst : instruction) 1601 { 1602 switch(inst->opcode) 1603 { 1604 case OPCODE_DEF: 1605 if(inst->dst.index + 1 > dirtyConstantsF) 1606 { 1607 dirtyConstantsF = inst->dst.index + 1; 1608 } 1609 break; 1610 case OPCODE_DEFI: 1611 if(inst->dst.index + 1 > dirtyConstantsI) 1612 { 1613 dirtyConstantsI = inst->dst.index + 1; 1614 } 1615 break; 1616 case OPCODE_DEFB: 1617 if(inst->dst.index + 1 > dirtyConstantsB) 1618 { 1619 dirtyConstantsB = inst->dst.index + 1; 1620 } 1621 break; 1622 default: 1623 break; 1624 } 1625 } 1626 } 1627 analyzeDynamicBranching()1628 void Shader::analyzeDynamicBranching() 1629 { 1630 dynamicBranching = false; 1631 containsLeave = false; 1632 containsBreak = false; 1633 containsContinue = false; 1634 containsDefine = false; 1635 1636 // Determine global presence of branching instructions 1637 for(const auto &inst : instruction) 1638 { 1639 switch(inst->opcode) 1640 { 1641 case OPCODE_CALLNZ: 1642 case OPCODE_IF: 1643 case OPCODE_IFC: 1644 case OPCODE_BREAK: 1645 case OPCODE_BREAKC: 1646 case OPCODE_CMP: 1647 case OPCODE_BREAKP: 1648 case OPCODE_LEAVE: 1649 case OPCODE_CONTINUE: 1650 if(inst->src[0].type != PARAMETER_CONSTBOOL) 1651 { 1652 dynamicBranching = true; 1653 } 1654 1655 if(inst->opcode == OPCODE_LEAVE) 1656 { 1657 containsLeave = true; 1658 } 1659 1660 if(inst->isBreak()) 1661 { 1662 containsBreak = true; 1663 } 1664 1665 if(inst->opcode == OPCODE_CONTINUE) 1666 { 1667 containsContinue = true; 1668 } 1669 case OPCODE_DEF: 1670 case OPCODE_DEFB: 1671 case OPCODE_DEFI: 1672 containsDefine = true; 1673 default: 1674 break; 1675 } 1676 } 1677 1678 // Conservatively determine which instructions are affected by dynamic branching 1679 int branchDepth = 0; 1680 int breakDepth = 0; 1681 int continueDepth = 0; 1682 bool leaveReturn = false; 1683 unsigned int functionBegin = 0; 1684 1685 for(unsigned int i = 0; i < instruction.size(); i++) 1686 { 1687 // If statements and loops 1688 if(instruction[i]->isBranch() || instruction[i]->isLoop()) 1689 { 1690 branchDepth++; 1691 } 1692 else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop()) 1693 { 1694 branchDepth--; 1695 } 1696 1697 if(branchDepth > 0) 1698 { 1699 instruction[i]->analysisBranch = true; 1700 1701 if(instruction[i]->isCall()) 1702 { 1703 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1704 } 1705 } 1706 1707 // Break statemement 1708 if(instruction[i]->isBreak()) 1709 { 1710 breakDepth++; 1711 } 1712 1713 if(breakDepth > 0) 1714 { 1715 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1716 { 1717 breakDepth++; 1718 } 1719 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1720 { 1721 breakDepth--; 1722 } 1723 1724 instruction[i]->analysisBreak = true; 1725 1726 if(instruction[i]->isCall()) 1727 { 1728 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1729 } 1730 } 1731 1732 // Continue statement 1733 if(instruction[i]->opcode == OPCODE_CONTINUE) 1734 { 1735 continueDepth++; 1736 } 1737 1738 if(continueDepth > 0) 1739 { 1740 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1741 { 1742 continueDepth++; 1743 } 1744 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1745 { 1746 continueDepth--; 1747 } 1748 1749 instruction[i]->analysisContinue = true; 1750 1751 if(instruction[i]->isCall()) 1752 { 1753 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); 1754 } 1755 } 1756 1757 // Return (leave) statement 1758 if(instruction[i]->opcode == OPCODE_LEAVE) 1759 { 1760 leaveReturn = true; 1761 1762 // Mark loop body instructions prior to the return statement 1763 for(unsigned int l = functionBegin; l < i; l++) 1764 { 1765 if(instruction[l]->isLoop()) 1766 { 1767 for(unsigned int r = l + 1; r < i; r++) 1768 { 1769 instruction[r]->analysisLeave = true; 1770 } 1771 1772 break; 1773 } 1774 } 1775 } 1776 else if(instruction[i]->opcode == OPCODE_RET) // End of the function 1777 { 1778 leaveReturn = false; 1779 } 1780 else if(instruction[i]->opcode == OPCODE_LABEL) 1781 { 1782 functionBegin = i; 1783 } 1784 1785 if(leaveReturn) 1786 { 1787 instruction[i]->analysisLeave = true; 1788 1789 if(instruction[i]->isCall()) 1790 { 1791 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); 1792 } 1793 } 1794 } 1795 } 1796 markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1797 void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag) 1798 { 1799 bool marker = false; 1800 for(auto &inst : instruction) 1801 { 1802 if(!marker) 1803 { 1804 if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel) 1805 { 1806 marker = true; 1807 } 1808 } 1809 else 1810 { 1811 if(inst->opcode == OPCODE_RET) 1812 { 1813 break; 1814 } 1815 else if(inst->isCall()) 1816 { 1817 markFunctionAnalysis(inst->dst.label, flag); 1818 } 1819 1820 inst->analysis |= flag; 1821 } 1822 } 1823 } 1824 analyzeSamplers()1825 void Shader::analyzeSamplers() 1826 { 1827 for(const auto &inst : instruction) 1828 { 1829 switch(inst->opcode) 1830 { 1831 case OPCODE_TEX: 1832 case OPCODE_TEXBEM: 1833 case OPCODE_TEXBEML: 1834 case OPCODE_TEXREG2AR: 1835 case OPCODE_TEXREG2GB: 1836 case OPCODE_TEXM3X2TEX: 1837 case OPCODE_TEXM3X3TEX: 1838 case OPCODE_TEXM3X3SPEC: 1839 case OPCODE_TEXM3X3VSPEC: 1840 case OPCODE_TEXREG2RGB: 1841 case OPCODE_TEXDP3TEX: 1842 case OPCODE_TEXM3X2DEPTH: 1843 case OPCODE_TEXLDD: 1844 case OPCODE_TEXLDL: 1845 case OPCODE_TEXLOD: 1846 case OPCODE_TEXOFFSET: 1847 case OPCODE_TEXOFFSETBIAS: 1848 case OPCODE_TEXLODOFFSET: 1849 case OPCODE_TEXELFETCH: 1850 case OPCODE_TEXELFETCHOFFSET: 1851 case OPCODE_TEXGRAD: 1852 case OPCODE_TEXGRADOFFSET: 1853 { 1854 Parameter &dst = inst->dst; 1855 Parameter &src1 = inst->src[1]; 1856 1857 if(majorVersion >= 2) 1858 { 1859 usedSamplers |= 1 << src1.index; 1860 } 1861 else 1862 { 1863 usedSamplers |= 1 << dst.index; 1864 } 1865 } 1866 break; 1867 default: 1868 break; 1869 } 1870 } 1871 } 1872 1873 // Assigns a unique index to each call instruction, on a per label basis. 1874 // This is used to know what basic block to return to. analyzeCallSites()1875 void Shader::analyzeCallSites() 1876 { 1877 int callSiteIndex[2048] = {0}; 1878 1879 for(auto &inst : instruction) 1880 { 1881 if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ) 1882 { 1883 int label = inst->dst.label; 1884 1885 inst->dst.callSite = callSiteIndex[label]++; 1886 } 1887 } 1888 } 1889 analyzeDynamicIndexing()1890 void Shader::analyzeDynamicIndexing() 1891 { 1892 dynamicallyIndexedTemporaries = false; 1893 dynamicallyIndexedInput = false; 1894 dynamicallyIndexedOutput = false; 1895 1896 for(const auto &inst : instruction) 1897 { 1898 if(inst->dst.rel.type == PARAMETER_ADDR || 1899 inst->dst.rel.type == PARAMETER_LOOP || 1900 inst->dst.rel.type == PARAMETER_TEMP || 1901 inst->dst.rel.type == PARAMETER_CONST) 1902 { 1903 switch(inst->dst.type) 1904 { 1905 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1906 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1907 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1908 default: break; 1909 } 1910 } 1911 1912 for(int j = 0; j < 3; j++) 1913 { 1914 if(inst->src[j].rel.type == PARAMETER_ADDR || 1915 inst->src[j].rel.type == PARAMETER_LOOP || 1916 inst->src[j].rel.type == PARAMETER_TEMP || 1917 inst->src[j].rel.type == PARAMETER_CONST) 1918 { 1919 switch(inst->src[j].type) 1920 { 1921 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; 1922 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; 1923 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; 1924 default: break; 1925 } 1926 } 1927 } 1928 } 1929 } 1930 } 1931