1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Shader.hpp" 16 17 #include "VertexShader.hpp" 18 #include "PixelShader.hpp" 19 #include "Common/Math.hpp" 20 #include "Common/Debug.hpp" 21 22 #include <algorithm> 23 #include <set> 24 #include <fstream> 25 #include <functional> 26 #include <sstream> 27 #include <stdarg.h> 28 #include <unordered_map> 29 #include <unordered_set> 30 31 namespace sw 32 { 33 volatile int Shader::serialCounter = 1; 34 OPCODE_DP(int i)35 Shader::Opcode Shader::OPCODE_DP(int i) 36 { 37 switch(i) 38 { 39 default: ASSERT(false); 40 case 1: return OPCODE_DP1; 41 case 2: return OPCODE_DP2; 42 case 3: return OPCODE_DP3; 43 case 4: return OPCODE_DP4; 44 } 45 } 46 OPCODE_LEN(int i)47 Shader::Opcode Shader::OPCODE_LEN(int i) 48 { 49 switch(i) 50 { 51 default: ASSERT(false); 52 case 1: return OPCODE_ABS; 53 case 2: return OPCODE_LEN2; 54 case 3: return OPCODE_LEN3; 55 case 4: return OPCODE_LEN4; 56 } 57 } 58 OPCODE_DIST(int i)59 Shader::Opcode Shader::OPCODE_DIST(int i) 60 { 61 switch(i) 62 { 63 default: ASSERT(false); 64 case 1: return OPCODE_DIST1; 65 case 2: return OPCODE_DIST2; 66 case 3: return OPCODE_DIST3; 67 case 4: return OPCODE_DIST4; 68 } 69 } 70 OPCODE_NRM(int i)71 Shader::Opcode Shader::OPCODE_NRM(int i) 72 { 73 switch(i) 74 { 75 default: ASSERT(false); 76 case 1: return OPCODE_SGN; 77 case 2: return OPCODE_NRM2; 78 case 3: return OPCODE_NRM3; 79 case 4: return OPCODE_NRM4; 80 } 81 } 82 OPCODE_FORWARD(int i)83 Shader::Opcode Shader::OPCODE_FORWARD(int i) 84 { 85 switch(i) 86 { 87 default: ASSERT(false); 88 case 1: return OPCODE_FORWARD1; 89 case 2: return OPCODE_FORWARD2; 90 case 3: return OPCODE_FORWARD3; 91 case 4: return OPCODE_FORWARD4; 92 } 93 } 94 OPCODE_REFLECT(int i)95 Shader::Opcode Shader::OPCODE_REFLECT(int i) 96 { 97 switch(i) 98 { 99 default: ASSERT(false); 100 case 1: return OPCODE_REFLECT1; 101 case 2: return OPCODE_REFLECT2; 102 case 3: return OPCODE_REFLECT3; 103 case 4: return OPCODE_REFLECT4; 104 } 105 } 106 OPCODE_REFRACT(int i)107 Shader::Opcode Shader::OPCODE_REFRACT(int i) 108 { 109 switch(i) 110 { 111 default: ASSERT(false); 112 case 1: return OPCODE_REFRACT1; 113 case 2: return OPCODE_REFRACT2; 114 case 3: return OPCODE_REFRACT3; 115 case 4: return OPCODE_REFRACT4; 116 } 117 } 118 Instruction(Opcode opcode)119 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) 120 { 121 control = CONTROL_RESERVED0; 122 123 predicate = false; 124 predicateNot = false; 125 predicateSwizzle = 0xE4; 126 127 coissue = false; 128 samplerType = SAMPLER_UNKNOWN; 129 usage = USAGE_POSITION; 130 usageIndex = 0; 131 } 132 Instruction(const unsigned long * token,int size,unsigned char majorVersion)133 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) 134 { 135 parseOperationToken(*token++, majorVersion); 136 137 samplerType = SAMPLER_UNKNOWN; 138 usage = USAGE_POSITION; 139 usageIndex = 0; 140 141 if(opcode == OPCODE_IF || 142 opcode == OPCODE_IFC || 143 opcode == OPCODE_LOOP || 144 opcode == OPCODE_REP || 145 opcode == OPCODE_BREAKC || 146 opcode == OPCODE_BREAKP) // No destination operand 147 { 148 if(size > 0) parseSourceToken(0, token++, majorVersion); 149 if(size > 1) parseSourceToken(1, token++, majorVersion); 150 if(size > 2) parseSourceToken(2, token++, majorVersion); 151 if(size > 3) ASSERT(false); 152 } 153 else if(opcode == OPCODE_DCL) 154 { 155 parseDeclarationToken(*token++); 156 parseDestinationToken(token++, majorVersion); 157 } 158 else 159 { 160 if(size > 0) 161 { 162 parseDestinationToken(token, majorVersion); 163 164 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) 165 { 166 token++; 167 size--; 168 } 169 170 token++; 171 size--; 172 } 173 174 if(predicate) 175 { 176 ASSERT(size != 0); 177 178 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; 179 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); 180 181 token++; 182 size--; 183 } 184 185 for(int i = 0; size > 0; i++) 186 { 187 parseSourceToken(i, token, majorVersion); 188 189 token++; 190 size--; 191 192 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) 193 { 194 token++; 195 size--; 196 } 197 } 198 } 199 } 200 ~Instruction()201 Shader::Instruction::~Instruction() 202 { 203 } 204 string(ShaderType shaderType,unsigned short version) const205 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const 206 { 207 std::string instructionString; 208 209 if(opcode != OPCODE_DCL) 210 { 211 instructionString += coissue ? "+ " : ""; 212 213 if(predicate) 214 { 215 instructionString += predicateNot ? "(!p0" : "(p0"; 216 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); 217 instructionString += ") "; 218 } 219 220 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); 221 222 if(dst.type != PARAMETER_VOID) 223 { 224 instructionString += " " + dst.string(shaderType, version) + 225 dst.relativeString() + 226 dst.maskString(); 227 } 228 229 for(int i = 0; i < 4; i++) 230 { 231 if(src[i].type != PARAMETER_VOID) 232 { 233 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " "; 234 instructionString += src[i].preModifierString() + 235 src[i].string(shaderType, version) + 236 src[i].relativeString() + 237 src[i].postModifierString() + 238 src[i].swizzleString(); 239 } 240 } 241 } 242 else // DCL 243 { 244 instructionString += "dcl"; 245 246 if(dst.type == PARAMETER_SAMPLER) 247 { 248 switch(samplerType) 249 { 250 case SAMPLER_UNKNOWN: instructionString += " "; break; 251 case SAMPLER_1D: instructionString += "_1d "; break; 252 case SAMPLER_2D: instructionString += "_2d "; break; 253 case SAMPLER_CUBE: instructionString += "_cube "; break; 254 case SAMPLER_VOLUME: instructionString += "_volume "; break; 255 default: 256 ASSERT(false); 257 } 258 259 instructionString += dst.string(shaderType, version); 260 } 261 else if(dst.type == PARAMETER_INPUT || 262 dst.type == PARAMETER_OUTPUT || 263 dst.type == PARAMETER_TEXTURE) 264 { 265 if(version >= 0x0300) 266 { 267 switch(usage) 268 { 269 case USAGE_POSITION: instructionString += "_position"; break; 270 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; 271 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break; 272 case USAGE_NORMAL: instructionString += "_normal"; break; 273 case USAGE_PSIZE: instructionString += "_psize"; break; 274 case USAGE_TEXCOORD: instructionString += "_texcoord"; break; 275 case USAGE_TANGENT: instructionString += "_tangent"; break; 276 case USAGE_BINORMAL: instructionString += "_binormal"; break; 277 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; 278 case USAGE_POSITIONT: instructionString += "_positiont"; break; 279 case USAGE_COLOR: instructionString += "_color"; break; 280 case USAGE_FOG: instructionString += "_fog"; break; 281 case USAGE_DEPTH: instructionString += "_depth"; break; 282 case USAGE_SAMPLE: instructionString += "_sample"; break; 283 default: 284 ASSERT(false); 285 } 286 287 if(usageIndex > 0) 288 { 289 std::ostringstream buffer; 290 291 buffer << (int)usageIndex; 292 293 instructionString += buffer.str(); 294 } 295 } 296 else ASSERT(dst.type != PARAMETER_OUTPUT); 297 298 instructionString += " "; 299 300 instructionString += dst.string(shaderType, version); 301 instructionString += dst.maskString(); 302 } 303 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace 304 { 305 instructionString += " "; 306 307 instructionString += dst.string(shaderType, version); 308 } 309 else ASSERT(false); 310 } 311 312 return instructionString; 313 } 314 modifierString() const315 std::string Shader::DestinationParameter::modifierString() const 316 { 317 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 318 { 319 return ""; 320 } 321 322 std::string modifierString; 323 324 if(saturate) 325 { 326 modifierString += "_sat"; 327 } 328 329 if(partialPrecision) 330 { 331 modifierString += "_pp"; 332 } 333 334 if(centroid) 335 { 336 modifierString += "_centroid"; 337 } 338 339 return modifierString; 340 } 341 shiftString() const342 std::string Shader::DestinationParameter::shiftString() const 343 { 344 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 345 { 346 return ""; 347 } 348 349 switch(shift) 350 { 351 case 0: return ""; 352 case 1: return "_x2"; 353 case 2: return "_x4"; 354 case 3: return "_x8"; 355 case -1: return "_d2"; 356 case -2: return "_d4"; 357 case -3: return "_d8"; 358 default: 359 return ""; 360 // ASSERT(false); // FIXME 361 } 362 } 363 maskString() const364 std::string Shader::DestinationParameter::maskString() const 365 { 366 if(type == PARAMETER_VOID || type == PARAMETER_LABEL) 367 { 368 return ""; 369 } 370 371 switch(mask) 372 { 373 case 0x0: return ""; 374 case 0x1: return ".x"; 375 case 0x2: return ".y"; 376 case 0x3: return ".xy"; 377 case 0x4: return ".z"; 378 case 0x5: return ".xz"; 379 case 0x6: return ".yz"; 380 case 0x7: return ".xyz"; 381 case 0x8: return ".w"; 382 case 0x9: return ".xw"; 383 case 0xA: return ".yw"; 384 case 0xB: return ".xyw"; 385 case 0xC: return ".zw"; 386 case 0xD: return ".xzw"; 387 case 0xE: return ".yzw"; 388 case 0xF: return ""; 389 default: 390 ASSERT(false); 391 } 392 393 return ""; 394 } 395 preModifierString() const396 std::string Shader::SourceParameter::preModifierString() const 397 { 398 if(type == PARAMETER_VOID) 399 { 400 return ""; 401 } 402 403 switch(modifier) 404 { 405 case MODIFIER_NONE: return ""; 406 case MODIFIER_NEGATE: return "-"; 407 case MODIFIER_BIAS: return ""; 408 case MODIFIER_BIAS_NEGATE: return "-"; 409 case MODIFIER_SIGN: return ""; 410 case MODIFIER_SIGN_NEGATE: return "-"; 411 case MODIFIER_COMPLEMENT: return "1-"; 412 case MODIFIER_X2: return ""; 413 case MODIFIER_X2_NEGATE: return "-"; 414 case MODIFIER_DZ: return ""; 415 case MODIFIER_DW: return ""; 416 case MODIFIER_ABS: return ""; 417 case MODIFIER_ABS_NEGATE: return "-"; 418 case MODIFIER_NOT: return "!"; 419 default: 420 ASSERT(false); 421 } 422 423 return ""; 424 } 425 relativeString() const426 std::string Shader::Parameter::relativeString() const 427 { 428 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) 429 { 430 if(rel.type == PARAMETER_VOID) 431 { 432 return ""; 433 } 434 else if(rel.type == PARAMETER_ADDR) 435 { 436 switch(rel.swizzle & 0x03) 437 { 438 case 0: return "[a0.x]"; 439 case 1: return "[a0.y]"; 440 case 2: return "[a0.z]"; 441 case 3: return "[a0.w]"; 442 } 443 } 444 else if(rel.type == PARAMETER_TEMP) 445 { 446 std::ostringstream buffer; 447 buffer << rel.index; 448 449 switch(rel.swizzle & 0x03) 450 { 451 case 0: return "[r" + buffer.str() + ".x]"; 452 case 1: return "[r" + buffer.str() + ".y]"; 453 case 2: return "[r" + buffer.str() + ".z]"; 454 case 3: return "[r" + buffer.str() + ".w]"; 455 } 456 } 457 else if(rel.type == PARAMETER_LOOP) 458 { 459 return "[aL]"; 460 } 461 else if(rel.type == PARAMETER_CONST) 462 { 463 std::ostringstream buffer; 464 buffer << rel.index; 465 466 switch(rel.swizzle & 0x03) 467 { 468 case 0: return "[c" + buffer.str() + ".x]"; 469 case 1: return "[c" + buffer.str() + ".y]"; 470 case 2: return "[c" + buffer.str() + ".z]"; 471 case 3: return "[c" + buffer.str() + ".w]"; 472 } 473 } 474 else ASSERT(false); 475 } 476 477 return ""; 478 } 479 postModifierString() const480 std::string Shader::SourceParameter::postModifierString() const 481 { 482 if(type == PARAMETER_VOID) 483 { 484 return ""; 485 } 486 487 switch(modifier) 488 { 489 case MODIFIER_NONE: return ""; 490 case MODIFIER_NEGATE: return ""; 491 case MODIFIER_BIAS: return "_bias"; 492 case MODIFIER_BIAS_NEGATE: return "_bias"; 493 case MODIFIER_SIGN: return "_bx2"; 494 case MODIFIER_SIGN_NEGATE: return "_bx2"; 495 case MODIFIER_COMPLEMENT: return ""; 496 case MODIFIER_X2: return "_x2"; 497 case MODIFIER_X2_NEGATE: return "_x2"; 498 case MODIFIER_DZ: return "_dz"; 499 case MODIFIER_DW: return "_dw"; 500 case MODIFIER_ABS: return "_abs"; 501 case MODIFIER_ABS_NEGATE: return "_abs"; 502 case MODIFIER_NOT: return ""; 503 default: 504 ASSERT(false); 505 } 506 507 return ""; 508 } 509 string(ShaderType shaderType,unsigned short version) const510 std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const 511 { 512 if(type == PARAMETER_CONST && bufferIndex >= 0) 513 { 514 std::ostringstream buffer; 515 buffer << bufferIndex; 516 517 std::ostringstream offset; 518 offset << index; 519 520 return "cb" + buffer.str() + "[" + offset.str() + "]"; 521 } 522 else 523 { 524 return Parameter::string(shaderType, version); 525 } 526 } 527 swizzleString() const528 std::string Shader::SourceParameter::swizzleString() const 529 { 530 return Instruction::swizzleString(type, swizzle); 531 } 532 parseOperationToken(unsigned long token,unsigned char majorVersion)533 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion) 534 { 535 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token 536 { 537 opcode = (Opcode)token; 538 539 control = CONTROL_RESERVED0; 540 predicate = false; 541 coissue = false; 542 } 543 else 544 { 545 opcode = (Opcode)(token & 0x0000FFFF); 546 control = (Control)((token & 0x00FF0000) >> 16); 547 548 int size = (token & 0x0F000000) >> 24; 549 550 predicate = (token & 0x10000000) != 0x00000000; 551 coissue = (token & 0x40000000) != 0x00000000; 552 553 if(majorVersion < 2) 554 { 555 if(size != 0) 556 { 557 ASSERT(false); // Reserved 558 } 559 } 560 561 if(majorVersion < 2) 562 { 563 if(predicate) 564 { 565 ASSERT(false); 566 } 567 } 568 569 if((token & 0x20000000) != 0x00000000) 570 { 571 ASSERT(false); // Reserved 572 } 573 574 if(majorVersion >= 2) 575 { 576 if(coissue) 577 { 578 ASSERT(false); // Reserved 579 } 580 } 581 582 if((token & 0x80000000) != 0x00000000) 583 { 584 ASSERT(false); 585 } 586 } 587 } 588 parseDeclarationToken(unsigned long token)589 void Shader::Instruction::parseDeclarationToken(unsigned long token) 590 { 591 samplerType = (SamplerType)((token & 0x78000000) >> 27); 592 usage = (Usage)(token & 0x0000001F); 593 usageIndex = (unsigned char)((token & 0x000F0000) >> 16); 594 } 595 parseDestinationToken(const unsigned long * token,unsigned char majorVersion)596 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) 597 { 598 dst.index = (unsigned short)(token[0] & 0x000007FF); 599 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 600 601 // TODO: Check type and index range 602 603 bool relative = (token[0] & 0x00002000) != 0x00000000; 604 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 605 dst.rel.swizzle = 0x00; 606 dst.rel.scale = 1; 607 608 if(relative && majorVersion >= 3) 609 { 610 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 611 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 612 } 613 else if(relative) ASSERT(false); // Reserved 614 615 if((token[0] & 0x0000C000) != 0x00000000) 616 { 617 ASSERT(false); // Reserved 618 } 619 620 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); 621 dst.saturate = (token[0] & 0x00100000) != 0; 622 dst.partialPrecision = (token[0] & 0x00200000) != 0; 623 dst.centroid = (token[0] & 0x00400000) != 0; 624 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; 625 626 if(majorVersion >= 2) 627 { 628 if(dst.shift) 629 { 630 ASSERT(false); // Reserved 631 } 632 } 633 634 if((token[0] & 0x80000000) != 0x80000000) 635 { 636 ASSERT(false); 637 } 638 } 639 parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)640 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) 641 { 642 // Defaults 643 src[i].index = 0; 644 src[i].type = PARAMETER_VOID; 645 src[i].modifier = MODIFIER_NONE; 646 src[i].swizzle = 0xE4; 647 src[i].rel.type = PARAMETER_VOID; 648 src[i].rel.swizzle = 0x00; 649 src[i].rel.scale = 1; 650 651 switch(opcode) 652 { 653 case OPCODE_DEF: 654 src[0].type = PARAMETER_FLOAT4LITERAL; 655 src[0].value[i] = *(float*)token; 656 break; 657 case OPCODE_DEFB: 658 src[0].type = PARAMETER_BOOL1LITERAL; 659 src[0].boolean[0] = *(int*)token; 660 break; 661 case OPCODE_DEFI: 662 src[0].type = PARAMETER_INT4LITERAL; 663 src[0].integer[i] = *(int*)token; 664 break; 665 default: 666 src[i].index = (unsigned short)(token[0] & 0x000007FF); 667 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); 668 669 // FIXME: Check type and index range 670 671 bool relative = (token[0] & 0x00002000) != 0x00000000; 672 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; 673 674 if((token[0] & 0x0000C000) != 0x00000000) 675 { 676 if(opcode != OPCODE_DEF && 677 opcode != OPCODE_DEFI && 678 opcode != OPCODE_DEFB) 679 { 680 ASSERT(false); 681 } 682 } 683 684 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); 685 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); 686 687 if((token[0] & 0x80000000) != 0x80000000) 688 { 689 if(opcode != OPCODE_DEF && 690 opcode != OPCODE_DEFI && 691 opcode != OPCODE_DEFB) 692 { 693 ASSERT(false); 694 } 695 } 696 697 if(relative && majorVersion >= 2) 698 { 699 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); 700 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); 701 } 702 } 703 } 704 swizzleString(ParameterType type,unsigned char swizzle)705 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) 706 { 707 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) 708 { 709 return ""; 710 } 711 712 int x = (swizzle & 0x03) >> 0; 713 int y = (swizzle & 0x0C) >> 2; 714 int z = (swizzle & 0x30) >> 4; 715 int w = (swizzle & 0xC0) >> 6; 716 717 std::string swizzleString = "."; 718 719 switch(x) 720 { 721 case 0: swizzleString += "x"; break; 722 case 1: swizzleString += "y"; break; 723 case 2: swizzleString += "z"; break; 724 case 3: swizzleString += "w"; break; 725 } 726 727 if(!(x == y && y == z && z == w)) 728 { 729 switch(y) 730 { 731 case 0: swizzleString += "x"; break; 732 case 1: swizzleString += "y"; break; 733 case 2: swizzleString += "z"; break; 734 case 3: swizzleString += "w"; break; 735 } 736 737 if(!(y == z && z == w)) 738 { 739 switch(z) 740 { 741 case 0: swizzleString += "x"; break; 742 case 1: swizzleString += "y"; break; 743 case 2: swizzleString += "z"; break; 744 case 3: swizzleString += "w"; break; 745 } 746 747 if(!(z == w)) 748 { 749 switch(w) 750 { 751 case 0: swizzleString += "x"; break; 752 case 1: swizzleString += "y"; break; 753 case 2: swizzleString += "z"; break; 754 case 3: swizzleString += "w"; break; 755 } 756 } 757 } 758 } 759 760 return swizzleString; 761 } 762 operationString(unsigned short version) const763 std::string Shader::Instruction::operationString(unsigned short version) const 764 { 765 switch(opcode) 766 { 767 case OPCODE_NULL: return "null"; 768 case OPCODE_NOP: return "nop"; 769 case OPCODE_MOV: return "mov"; 770 case OPCODE_ADD: return "add"; 771 case OPCODE_IADD: return "iadd"; 772 case OPCODE_SUB: return "sub"; 773 case OPCODE_ISUB: return "isub"; 774 case OPCODE_MAD: return "mad"; 775 case OPCODE_IMAD: return "imad"; 776 case OPCODE_MUL: return "mul"; 777 case OPCODE_IMUL: return "imul"; 778 case OPCODE_RCPX: return "rcpx"; 779 case OPCODE_DIV: return "div"; 780 case OPCODE_IDIV: return "idiv"; 781 case OPCODE_UDIV: return "udiv"; 782 case OPCODE_MOD: return "mod"; 783 case OPCODE_IMOD: return "imod"; 784 case OPCODE_UMOD: return "umod"; 785 case OPCODE_SHL: return "shl"; 786 case OPCODE_ISHR: return "ishr"; 787 case OPCODE_USHR: return "ushr"; 788 case OPCODE_RSQX: return "rsqx"; 789 case OPCODE_SQRT: return "sqrt"; 790 case OPCODE_RSQ: return "rsq"; 791 case OPCODE_LEN2: return "len2"; 792 case OPCODE_LEN3: return "len3"; 793 case OPCODE_LEN4: return "len4"; 794 case OPCODE_DIST1: return "dist1"; 795 case OPCODE_DIST2: return "dist2"; 796 case OPCODE_DIST3: return "dist3"; 797 case OPCODE_DIST4: return "dist4"; 798 case OPCODE_DP3: return "dp3"; 799 case OPCODE_DP4: return "dp4"; 800 case OPCODE_DET2: return "det2"; 801 case OPCODE_DET3: return "det3"; 802 case OPCODE_DET4: return "det4"; 803 case OPCODE_MIN: return "min"; 804 case OPCODE_IMIN: return "imin"; 805 case OPCODE_UMIN: return "umin"; 806 case OPCODE_MAX: return "max"; 807 case OPCODE_IMAX: return "imax"; 808 case OPCODE_UMAX: return "umax"; 809 case OPCODE_SLT: return "slt"; 810 case OPCODE_SGE: return "sge"; 811 case OPCODE_EXP2X: return "exp2x"; 812 case OPCODE_LOG2X: return "log2x"; 813 case OPCODE_LIT: return "lit"; 814 case OPCODE_ATT: return "att"; 815 case OPCODE_LRP: return "lrp"; 816 case OPCODE_STEP: return "step"; 817 case OPCODE_SMOOTH: return "smooth"; 818 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; 819 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt"; 820 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; 821 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat"; 822 case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; 823 case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; 824 case OPCODE_PACKHALF2x16: return "packHalf2x16"; 825 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16"; 826 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16"; 827 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; 828 case OPCODE_FRC: return "frc"; 829 case OPCODE_M4X4: return "m4x4"; 830 case OPCODE_M4X3: return "m4x3"; 831 case OPCODE_M3X4: return "m3x4"; 832 case OPCODE_M3X3: return "m3x3"; 833 case OPCODE_M3X2: return "m3x2"; 834 case OPCODE_CALL: return "call"; 835 case OPCODE_CALLNZ: return "callnz"; 836 case OPCODE_LOOP: return "loop"; 837 case OPCODE_RET: return "ret"; 838 case OPCODE_ENDLOOP: return "endloop"; 839 case OPCODE_LABEL: return "label"; 840 case OPCODE_DCL: return "dcl"; 841 case OPCODE_POWX: return "powx"; 842 case OPCODE_CRS: return "crs"; 843 case OPCODE_SGN: return "sgn"; 844 case OPCODE_ISGN: return "isgn"; 845 case OPCODE_ABS: return "abs"; 846 case OPCODE_IABS: return "iabs"; 847 case OPCODE_NRM2: return "nrm2"; 848 case OPCODE_NRM3: return "nrm3"; 849 case OPCODE_NRM4: return "nrm4"; 850 case OPCODE_SINCOS: return "sincos"; 851 case OPCODE_REP: return "rep"; 852 case OPCODE_ENDREP: return "endrep"; 853 case OPCODE_IF: return "if"; 854 case OPCODE_IFC: return "ifc"; 855 case OPCODE_ELSE: return "else"; 856 case OPCODE_ENDIF: return "endif"; 857 case OPCODE_BREAK: return "break"; 858 case OPCODE_BREAKC: return "breakc"; 859 case OPCODE_MOVA: return "mova"; 860 case OPCODE_DEFB: return "defb"; 861 case OPCODE_DEFI: return "defi"; 862 case OPCODE_TEXCOORD: return "texcoord"; 863 case OPCODE_TEXKILL: return "texkill"; 864 case OPCODE_DISCARD: return "discard"; 865 case OPCODE_TEX: 866 if(version < 0x0104) return "tex"; 867 else return "texld"; 868 case OPCODE_TEXBEM: return "texbem"; 869 case OPCODE_TEXBEML: return "texbeml"; 870 case OPCODE_TEXREG2AR: return "texreg2ar"; 871 case OPCODE_TEXREG2GB: return "texreg2gb"; 872 case OPCODE_TEXM3X2PAD: return "texm3x2pad"; 873 case OPCODE_TEXM3X2TEX: return "texm3x2tex"; 874 case OPCODE_TEXM3X3PAD: return "texm3x3pad"; 875 case OPCODE_TEXM3X3TEX: return "texm3x3tex"; 876 case OPCODE_RESERVED0: return "reserved0"; 877 case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; 878 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; 879 case OPCODE_EXPP: return "expp"; 880 case OPCODE_LOGP: return "logp"; 881 case OPCODE_CND: return "cnd"; 882 case OPCODE_DEF: return "def"; 883 case OPCODE_TEXREG2RGB: return "texreg2rgb"; 884 case OPCODE_TEXDP3TEX: return "texdp3tex"; 885 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; 886 case OPCODE_TEXDP3: return "texdp3"; 887 case OPCODE_TEXM3X3: return "texm3x3"; 888 case OPCODE_TEXDEPTH: return "texdepth"; 889 case OPCODE_CMP0: return "cmp0"; 890 case OPCODE_ICMP: return "icmp"; 891 case OPCODE_UCMP: return "ucmp"; 892 case OPCODE_SELECT: return "select"; 893 case OPCODE_EXTRACT: return "extract"; 894 case OPCODE_INSERT: return "insert"; 895 case OPCODE_BEM: return "bem"; 896 case OPCODE_DP2ADD: return "dp2add"; 897 case OPCODE_DFDX: return "dFdx"; 898 case OPCODE_DFDY: return "dFdy"; 899 case OPCODE_FWIDTH: return "fwidth"; 900 case OPCODE_TEXLDD: return "texldd"; 901 case OPCODE_CMP: return "cmp"; 902 case OPCODE_TEXLDL: return "texldl"; 903 case OPCODE_TEXBIAS: return "texbias"; 904 case OPCODE_TEXOFFSET: return "texoffset"; 905 case OPCODE_TEXOFFSETBIAS: return "texoffsetbias"; 906 case OPCODE_TEXLOD: return "texlod"; 907 case OPCODE_TEXLODOFFSET: return "texlodoffset"; 908 case OPCODE_TEXELFETCH: return "texelfetch"; 909 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset"; 910 case OPCODE_TEXGRAD: return "texgrad"; 911 case OPCODE_TEXGRADOFFSET: return "texgradoffset"; 912 case OPCODE_BREAKP: return "breakp"; 913 case OPCODE_TEXSIZE: return "texsize"; 914 case OPCODE_PHASE: return "phase"; 915 case OPCODE_COMMENT: return "comment"; 916 case OPCODE_END: return "end"; 917 case OPCODE_PS_1_0: return "ps_1_0"; 918 case OPCODE_PS_1_1: return "ps_1_1"; 919 case OPCODE_PS_1_2: return "ps_1_2"; 920 case OPCODE_PS_1_3: return "ps_1_3"; 921 case OPCODE_PS_1_4: return "ps_1_4"; 922 case OPCODE_PS_2_0: return "ps_2_0"; 923 case OPCODE_PS_2_x: return "ps_2_x"; 924 case OPCODE_PS_3_0: return "ps_3_0"; 925 case OPCODE_VS_1_0: return "vs_1_0"; 926 case OPCODE_VS_1_1: return "vs_1_1"; 927 case OPCODE_VS_2_0: return "vs_2_0"; 928 case OPCODE_VS_2_x: return "vs_2_x"; 929 case OPCODE_VS_2_sw: return "vs_2_sw"; 930 case OPCODE_VS_3_0: return "vs_3_0"; 931 case OPCODE_VS_3_sw: return "vs_3_sw"; 932 case OPCODE_WHILE: return "while"; 933 case OPCODE_ENDWHILE: return "endwhile"; 934 case OPCODE_COS: return "cos"; 935 case OPCODE_SIN: return "sin"; 936 case OPCODE_TAN: return "tan"; 937 case OPCODE_ACOS: return "acos"; 938 case OPCODE_ASIN: return "asin"; 939 case OPCODE_ATAN: return "atan"; 940 case OPCODE_ATAN2: return "atan2"; 941 case OPCODE_COSH: return "cosh"; 942 case OPCODE_SINH: return "sinh"; 943 case OPCODE_TANH: return "tanh"; 944 case OPCODE_ACOSH: return "acosh"; 945 case OPCODE_ASINH: return "asinh"; 946 case OPCODE_ATANH: return "atanh"; 947 case OPCODE_DP1: return "dp1"; 948 case OPCODE_DP2: return "dp2"; 949 case OPCODE_TRUNC: return "trunc"; 950 case OPCODE_FLOOR: return "floor"; 951 case OPCODE_ROUND: return "round"; 952 case OPCODE_ROUNDEVEN: return "roundEven"; 953 case OPCODE_CEIL: return "ceil"; 954 case OPCODE_EXP2: return "exp2"; 955 case OPCODE_LOG2: return "log2"; 956 case OPCODE_EXP: return "exp"; 957 case OPCODE_LOG: return "log"; 958 case OPCODE_POW: return "pow"; 959 case OPCODE_F2B: return "f2b"; 960 case OPCODE_B2F: return "b2f"; 961 case OPCODE_F2I: return "f2i"; 962 case OPCODE_I2F: return "i2f"; 963 case OPCODE_F2U: return "f2u"; 964 case OPCODE_U2F: return "u2f"; 965 case OPCODE_B2I: return "b2i"; 966 case OPCODE_I2B: return "i2b"; 967 case OPCODE_ALL: return "all"; 968 case OPCODE_ANY: return "any"; 969 case OPCODE_NEG: return "neg"; 970 case OPCODE_INEG: return "ineg"; 971 case OPCODE_ISNAN: return "isnan"; 972 case OPCODE_ISINF: return "isinf"; 973 case OPCODE_NOT: return "not"; 974 case OPCODE_OR: return "or"; 975 case OPCODE_XOR: return "xor"; 976 case OPCODE_AND: return "and"; 977 case OPCODE_EQ: return "eq"; 978 case OPCODE_NE: return "neq"; 979 case OPCODE_FORWARD1: return "forward1"; 980 case OPCODE_FORWARD2: return "forward2"; 981 case OPCODE_FORWARD3: return "forward3"; 982 case OPCODE_FORWARD4: return "forward4"; 983 case OPCODE_REFLECT1: return "reflect1"; 984 case OPCODE_REFLECT2: return "reflect2"; 985 case OPCODE_REFLECT3: return "reflect3"; 986 case OPCODE_REFLECT4: return "reflect4"; 987 case OPCODE_REFRACT1: return "refract1"; 988 case OPCODE_REFRACT2: return "refract2"; 989 case OPCODE_REFRACT3: return "refract3"; 990 case OPCODE_REFRACT4: return "refract4"; 991 case OPCODE_LEAVE: return "leave"; 992 case OPCODE_CONTINUE: return "continue"; 993 case OPCODE_TEST: return "test"; 994 case OPCODE_SWITCH: return "switch"; 995 case OPCODE_ENDSWITCH: return "endswitch"; 996 case OPCODE_SCALAR: return "scalar"; 997 } 998 999 return "<unknown>"; 1000 } 1001 controlString() const1002 std::string Shader::Instruction::controlString() const 1003 { 1004 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) 1005 { 1006 if(project) return "p"; 1007 1008 if(bias) return "b"; 1009 1010 // FIXME: LOD 1011 } 1012 1013 switch(control) 1014 { 1015 case 1: return "_gt"; 1016 case 2: return "_eq"; 1017 case 3: return "_ge"; 1018 case 4: return "_lt"; 1019 case 5: return "_ne"; 1020 case 6: return "_le"; 1021 default: 1022 return ""; 1023 // ASSERT(false); // FIXME 1024 } 1025 } 1026 string(ShaderType shaderType,unsigned short version) const1027 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const 1028 { 1029 std::ostringstream buffer; 1030 1031 if(type == PARAMETER_FLOAT4LITERAL) 1032 { 1033 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; 1034 1035 return buffer.str(); 1036 } 1037 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) 1038 { 1039 buffer << index; 1040 1041 return typeString(shaderType, version) + buffer.str(); 1042 } 1043 else 1044 { 1045 return typeString(shaderType, version); 1046 } 1047 } 1048 typeString(ShaderType shaderType,unsigned short version) const1049 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const 1050 { 1051 switch(type) 1052 { 1053 case PARAMETER_TEMP: return "r"; 1054 case PARAMETER_INPUT: return "v"; 1055 case PARAMETER_CONST: return "c"; 1056 case PARAMETER_TEXTURE: 1057 // case PARAMETER_ADDR: 1058 if(shaderType == SHADER_PIXEL) return "t"; 1059 else return "a0"; 1060 case PARAMETER_RASTOUT: 1061 if(index == 0) return "oPos"; 1062 else if(index == 1) return "oFog"; 1063 else if(index == 2) return "oPts"; 1064 else ASSERT(false); 1065 case PARAMETER_ATTROUT: return "oD"; 1066 case PARAMETER_TEXCRDOUT: 1067 // case PARAMETER_OUTPUT: return ""; 1068 if(version < 0x0300) return "oT"; 1069 else return "o"; 1070 case PARAMETER_CONSTINT: return "i"; 1071 case PARAMETER_COLOROUT: return "oC"; 1072 case PARAMETER_DEPTHOUT: return "oDepth"; 1073 case PARAMETER_SAMPLER: return "s"; 1074 // case PARAMETER_CONST2: return ""; 1075 // case PARAMETER_CONST3: return ""; 1076 // case PARAMETER_CONST4: return ""; 1077 case PARAMETER_CONSTBOOL: return "b"; 1078 case PARAMETER_LOOP: return "aL"; 1079 // case PARAMETER_TEMPFLOAT16: return ""; 1080 case PARAMETER_MISCTYPE: 1081 switch(index) 1082 { 1083 case VPosIndex: return "vPos"; 1084 case VFaceIndex: return "vFace"; 1085 case InstanceIDIndex: return "iID"; 1086 case VertexIDIndex: return "vID"; 1087 default: ASSERT(false); 1088 } 1089 case PARAMETER_LABEL: return "l"; 1090 case PARAMETER_PREDICATE: return "p0"; 1091 case PARAMETER_FLOAT4LITERAL: return ""; 1092 case PARAMETER_BOOL1LITERAL: return ""; 1093 case PARAMETER_INT4LITERAL: return ""; 1094 // case PARAMETER_VOID: return ""; 1095 default: 1096 ASSERT(false); 1097 } 1098 1099 return ""; 1100 } 1101 isBranch() const1102 bool Shader::Instruction::isBranch() const 1103 { 1104 return opcode == OPCODE_IF || opcode == OPCODE_IFC; 1105 } 1106 isCall() const1107 bool Shader::Instruction::isCall() const 1108 { 1109 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; 1110 } 1111 isBreak() const1112 bool Shader::Instruction::isBreak() const 1113 { 1114 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; 1115 } 1116 isLoop() const1117 bool Shader::Instruction::isLoop() const 1118 { 1119 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE; 1120 } 1121 isEndLoop() const1122 bool Shader::Instruction::isEndLoop() const 1123 { 1124 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE; 1125 } 1126 isPredicated() const1127 bool Shader::Instruction::isPredicated() const 1128 { 1129 return predicate || 1130 analysisBranch || 1131 analysisBreak || 1132 analysisContinue || 1133 analysisLeave; 1134 } 1135 Shader()1136 Shader::Shader() : serialID(serialCounter++) 1137 { 1138 usedSamplers = 0; 1139 } 1140 ~Shader()1141 Shader::~Shader() 1142 { 1143 for(auto &inst : instruction) 1144 { 1145 delete inst; 1146 inst = 0; 1147 } 1148 } 1149 parse(const unsigned long * token)1150 void Shader::parse(const unsigned long *token) 1151 { 1152 minorVersion = (unsigned char)(token[0] & 0x000000FF); 1153 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); 1154 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); 1155 1156 int length = 0; 1157 1158 if(shaderType == SHADER_VERTEX) 1159 { 1160 length = VertexShader::validate(token); 1161 } 1162 else if(shaderType == SHADER_PIXEL) 1163 { 1164 length = PixelShader::validate(token); 1165 } 1166 else ASSERT(false); 1167 1168 ASSERT(length != 0); 1169 instruction.resize(length); 1170 1171 for(int i = 0; i < length; i++) 1172 { 1173 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token 1174 { 1175 int length = (*token & 0x7FFF0000) >> 16; 1176 1177 token += length + 1; 1178 } 1179 1180 int tokenCount = size(*token); 1181 1182 instruction[i] = new Instruction(token, tokenCount, majorVersion); 1183 1184 token += 1 + tokenCount; 1185 } 1186 } 1187 size(unsigned long opcode) const1188 int Shader::size(unsigned long opcode) const 1189 { 1190 return size(opcode, shaderModel); 1191 } 1192 size(unsigned long opcode,unsigned short shaderModel)1193 int Shader::size(unsigned long opcode, unsigned short shaderModel) 1194 { 1195 if(shaderModel > 0x0300) 1196 { 1197 ASSERT(false); 1198 } 1199 1200 static const signed char size[] = 1201 { 1202 0, // NOP = 0 1203 2, // MOV 1204 3, // ADD 1205 3, // SUB 1206 4, // MAD 1207 3, // MUL 1208 2, // RCP 1209 2, // RSQ 1210 3, // DP3 1211 3, // DP4 1212 3, // MIN 1213 3, // MAX 1214 3, // SLT 1215 3, // SGE 1216 2, // EXP 1217 2, // LOG 1218 2, // LIT 1219 3, // DST 1220 4, // LRP 1221 2, // FRC 1222 3, // M4x4 1223 3, // M4x3 1224 3, // M3x4 1225 3, // M3x3 1226 3, // M3x2 1227 1, // CALL 1228 2, // CALLNZ 1229 2, // LOOP 1230 0, // RET 1231 0, // ENDLOOP 1232 1, // LABEL 1233 2, // DCL 1234 3, // POW 1235 3, // CRS 1236 4, // SGN 1237 2, // ABS 1238 2, // NRM 1239 4, // SINCOS 1240 1, // REP 1241 0, // ENDREP 1242 1, // IF 1243 2, // IFC 1244 0, // ELSE 1245 0, // ENDIF 1246 0, // BREAK 1247 2, // BREAKC 1248 2, // MOVA 1249 2, // DEFB 1250 5, // DEFI 1251 -1, // 49 1252 -1, // 50 1253 -1, // 51 1254 -1, // 52 1255 -1, // 53 1256 -1, // 54 1257 -1, // 55 1258 -1, // 56 1259 -1, // 57 1260 -1, // 58 1261 -1, // 59 1262 -1, // 60 1263 -1, // 61 1264 -1, // 62 1265 -1, // 63 1266 1, // TEXCOORD = 64 1267 1, // TEXKILL 1268 1, // TEX 1269 2, // TEXBEM 1270 2, // TEXBEML 1271 2, // TEXREG2AR 1272 2, // TEXREG2GB 1273 2, // TEXM3x2PAD 1274 2, // TEXM3x2TEX 1275 2, // TEXM3x3PAD 1276 2, // TEXM3x3TEX 1277 -1, // RESERVED0 1278 3, // TEXM3x3SPEC 1279 2, // TEXM3x3VSPEC 1280 2, // EXPP 1281 2, // LOGP 1282 4, // CND 1283 5, // DEF 1284 2, // TEXREG2RGB 1285 2, // TEXDP3TEX 1286 2, // TEXM3x2DEPTH 1287 2, // TEXDP3 1288 2, // TEXM3x3 1289 1, // TEXDEPTH 1290 4, // CMP 1291 3, // BEM 1292 4, // DP2ADD 1293 2, // DSX 1294 2, // DSY 1295 5, // TEXLDD 1296 3, // SETP 1297 3, // TEXLDL 1298 2, // BREAKP 1299 -1, // 97 1300 -1, // 98 1301 -1, // 99 1302 -1, // 100 1303 -1, // 101 1304 -1, // 102 1305 -1, // 103 1306 -1, // 104 1307 -1, // 105 1308 -1, // 106 1309 -1, // 107 1310 -1, // 108 1311 -1, // 109 1312 -1, // 110 1313 -1, // 111 1314 -1, // 112 1315 }; 1316 1317 int length = 0; 1318 1319 if((opcode & 0x0000FFFF) == OPCODE_COMMENT) 1320 { 1321 return (opcode & 0x7FFF0000) >> 16; 1322 } 1323 1324 if(opcode != OPCODE_PS_1_0 && 1325 opcode != OPCODE_PS_1_1 && 1326 opcode != OPCODE_PS_1_2 && 1327 opcode != OPCODE_PS_1_3 && 1328 opcode != OPCODE_PS_1_4 && 1329 opcode != OPCODE_PS_2_0 && 1330 opcode != OPCODE_PS_2_x && 1331 opcode != OPCODE_PS_3_0 && 1332 opcode != OPCODE_VS_1_0 && 1333 opcode != OPCODE_VS_1_1 && 1334 opcode != OPCODE_VS_2_0 && 1335 opcode != OPCODE_VS_2_x && 1336 opcode != OPCODE_VS_2_sw && 1337 opcode != OPCODE_VS_3_0 && 1338 opcode != OPCODE_VS_3_sw && 1339 opcode != OPCODE_PHASE && 1340 opcode != OPCODE_END) 1341 { 1342 if(shaderModel >= 0x0200) 1343 { 1344 length = (opcode & 0x0F000000) >> 24; 1345 } 1346 else 1347 { 1348 length = size[opcode & 0x0000FFFF]; 1349 } 1350 } 1351 1352 if(length < 0) 1353 { 1354 ASSERT(false); 1355 } 1356 1357 if(shaderModel == 0x0104) 1358 { 1359 switch(opcode & 0x0000FFFF) 1360 { 1361 case OPCODE_TEX: 1362 length += 1; 1363 break; 1364 case OPCODE_TEXCOORD: 1365 length += 1; 1366 break; 1367 default: 1368 break; 1369 } 1370 } 1371 1372 return length; 1373 } 1374 maskContainsComponent(int mask,int component)1375 bool Shader::maskContainsComponent(int mask, int component) 1376 { 1377 return (mask & (1 << component)) != 0; 1378 } 1379 swizzleContainsComponent(int swizzle,int component)1380 bool Shader::swizzleContainsComponent(int swizzle, int component) 1381 { 1382 if((swizzle & 0x03) >> 0 == component) return true; 1383 if((swizzle & 0x0C) >> 2 == component) return true; 1384 if((swizzle & 0x30) >> 4 == component) return true; 1385 if((swizzle & 0xC0) >> 6 == component) return true; 1386 1387 return false; 1388 } 1389 swizzleContainsComponentMasked(int swizzle,int component,int mask)1390 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask) 1391 { 1392 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true; 1393 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true; 1394 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true; 1395 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true; 1396 1397 return false; 1398 } 1399 containsDynamicBranching() const1400 bool Shader::containsDynamicBranching() const 1401 { 1402 return dynamicBranching; 1403 } 1404 containsBreakInstruction() const1405 bool Shader::containsBreakInstruction() const 1406 { 1407 return containsBreak; 1408 } 1409 containsContinueInstruction() const1410 bool Shader::containsContinueInstruction() const 1411 { 1412 return containsContinue; 1413 } 1414 containsLeaveInstruction() const1415 bool Shader::containsLeaveInstruction() const 1416 { 1417 return containsLeave; 1418 } 1419 containsDefineInstruction() const1420 bool Shader::containsDefineInstruction() const 1421 { 1422 return containsDefine; 1423 } 1424 usesSampler(int index) const1425 bool Shader::usesSampler(int index) const 1426 { 1427 return (usedSamplers & (1 << index)) != 0; 1428 } 1429 getSerialID() const1430 int Shader::getSerialID() const 1431 { 1432 return serialID; 1433 } 1434 getLength() const1435 size_t Shader::getLength() const 1436 { 1437 return instruction.size(); 1438 } 1439 getShaderType() const1440 Shader::ShaderType Shader::getShaderType() const 1441 { 1442 return shaderType; 1443 } 1444 getShaderModel() const1445 unsigned short Shader::getShaderModel() const 1446 { 1447 return shaderModel; 1448 } 1449 print(const char * fileName,...) const1450 void Shader::print(const char *fileName, ...) const 1451 { 1452 char fullName[1024 + 1]; 1453 1454 va_list vararg; 1455 va_start(vararg, fileName); 1456 vsnprintf(fullName, 1024, fileName, vararg); 1457 va_end(vararg); 1458 1459 std::ofstream file(fullName, std::ofstream::out); 1460 1461 for(const auto &inst : instruction) 1462 { 1463 file << inst->string(shaderType, shaderModel) << std::endl; 1464 } 1465 } 1466 printInstruction(int index,const char * fileName) const1467 void Shader::printInstruction(int index, const char *fileName) const 1468 { 1469 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app); 1470 1471 file << instruction[index]->string(shaderType, shaderModel) << std::endl; 1472 } 1473 append(Instruction * instruction)1474 void Shader::append(Instruction *instruction) 1475 { 1476 this->instruction.push_back(instruction); 1477 } 1478 declareSampler(int i)1479 void Shader::declareSampler(int i) 1480 { 1481 if(i >= 0 && i < 16) 1482 { 1483 usedSamplers |= 1 << i; 1484 } 1485 } 1486 getInstruction(size_t i) const1487 const Shader::Instruction *Shader::getInstruction(size_t i) const 1488 { 1489 ASSERT(i < instruction.size()); 1490 1491 return instruction[i]; 1492 } 1493 optimize()1494 void Shader::optimize() 1495 { 1496 optimizeLeave(); 1497 optimizeCall(); 1498 removeNull(); 1499 } 1500 optimizeLeave()1501 void Shader::optimizeLeave() 1502 { 1503 // A return (leave) right before the end of a function or the shader can be removed 1504 for(unsigned int i = 0; i < instruction.size(); i++) 1505 { 1506 if(instruction[i]->opcode == OPCODE_LEAVE) 1507 { 1508 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET) 1509 { 1510 instruction[i]->opcode = OPCODE_NULL; 1511 } 1512 } 1513 } 1514 } 1515 optimizeCall()1516 void Shader::optimizeCall() 1517 { 1518 // Eliminate uncalled functions 1519 std::set<int> calledFunctions; 1520 bool rescan = true; 1521 1522 while(rescan) 1523 { 1524 calledFunctions.clear(); 1525 rescan = false; 1526 1527 for(const auto &inst : instruction) 1528 { 1529 if(inst->isCall()) 1530 { 1531 calledFunctions.insert(inst->dst.label); 1532 } 1533 } 1534 1535 if(!calledFunctions.empty()) 1536 { 1537 for(unsigned int i = 0; i < instruction.size(); i++) 1538 { 1539 if(instruction[i]->opcode == OPCODE_LABEL) 1540 { 1541 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end()) 1542 { 1543 for( ; i < instruction.size(); i++) 1544 { 1545 Opcode oldOpcode = instruction[i]->opcode; 1546 instruction[i]->opcode = OPCODE_NULL; 1547 1548 if(oldOpcode == OPCODE_RET) 1549 { 1550 rescan = true; 1551 break; 1552 } 1553 } 1554 } 1555 } 1556 } 1557 } 1558 } 1559 1560 // Optimize the entry call 1561 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET) 1562 { 1563 if(calledFunctions.size() == 1) 1564 { 1565 instruction[0]->opcode = OPCODE_NULL; 1566 instruction[1]->opcode = OPCODE_NULL; 1567 1568 for(size_t i = 2; i < instruction.size(); i++) 1569 { 1570 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET) 1571 { 1572 instruction[i]->opcode = OPCODE_NULL; 1573 } 1574 } 1575 } 1576 } 1577 } 1578 removeNull()1579 void Shader::removeNull() 1580 { 1581 size_t size = 0; 1582 for(size_t i = 0; i < instruction.size(); i++) 1583 { 1584 if(instruction[i]->opcode != OPCODE_NULL) 1585 { 1586 instruction[size] = instruction[i]; 1587 size++; 1588 } 1589 else 1590 { 1591 delete instruction[i]; 1592 } 1593 } 1594 1595 instruction.resize(size); 1596 } 1597 analyzeDirtyConstants()1598 void Shader::analyzeDirtyConstants() 1599 { 1600 dirtyConstantsF = 0; 1601 dirtyConstantsI = 0; 1602 dirtyConstantsB = 0; 1603 1604 for(const auto &inst : instruction) 1605 { 1606 switch(inst->opcode) 1607 { 1608 case OPCODE_DEF: 1609 if(inst->dst.index + 1 > dirtyConstantsF) 1610 { 1611 dirtyConstantsF = inst->dst.index + 1; 1612 } 1613 break; 1614 case OPCODE_DEFI: 1615 if(inst->dst.index + 1 > dirtyConstantsI) 1616 { 1617 dirtyConstantsI = inst->dst.index + 1; 1618 } 1619 break; 1620 case OPCODE_DEFB: 1621 if(inst->dst.index + 1 > dirtyConstantsB) 1622 { 1623 dirtyConstantsB = inst->dst.index + 1; 1624 } 1625 break; 1626 default: 1627 break; 1628 } 1629 } 1630 } 1631 analyzeDynamicBranching()1632 void Shader::analyzeDynamicBranching() 1633 { 1634 dynamicBranching = false; 1635 containsLeave = false; 1636 containsBreak = false; 1637 containsContinue = false; 1638 containsDefine = false; 1639 1640 // Determine global presence of branching instructions 1641 for(const auto &inst : instruction) 1642 { 1643 switch(inst->opcode) 1644 { 1645 case OPCODE_CALLNZ: 1646 case OPCODE_IF: 1647 case OPCODE_IFC: 1648 case OPCODE_BREAK: 1649 case OPCODE_BREAKC: 1650 case OPCODE_CMP: 1651 case OPCODE_BREAKP: 1652 case OPCODE_LEAVE: 1653 case OPCODE_CONTINUE: 1654 if(inst->src[0].type != PARAMETER_CONSTBOOL) 1655 { 1656 dynamicBranching = true; 1657 } 1658 1659 if(inst->opcode == OPCODE_LEAVE) 1660 { 1661 containsLeave = true; 1662 } 1663 1664 if(inst->isBreak()) 1665 { 1666 containsBreak = true; 1667 } 1668 1669 if(inst->opcode == OPCODE_CONTINUE) 1670 { 1671 containsContinue = true; 1672 } 1673 case OPCODE_DEF: 1674 case OPCODE_DEFB: 1675 case OPCODE_DEFI: 1676 containsDefine = true; 1677 default: 1678 break; 1679 } 1680 } 1681 1682 // Conservatively determine which instructions are affected by dynamic branching 1683 int branchDepth = 0; 1684 int breakDepth = 0; 1685 int continueDepth = 0; 1686 bool leaveReturn = false; 1687 unsigned int functionBegin = 0; 1688 1689 for(unsigned int i = 0; i < instruction.size(); i++) 1690 { 1691 // If statements and loops 1692 if(instruction[i]->isBranch() || instruction[i]->isLoop()) 1693 { 1694 branchDepth++; 1695 } 1696 else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop()) 1697 { 1698 branchDepth--; 1699 } 1700 1701 if(branchDepth > 0) 1702 { 1703 instruction[i]->analysisBranch = true; 1704 1705 if(instruction[i]->isCall()) 1706 { 1707 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1708 } 1709 } 1710 1711 // Break statemement 1712 if(instruction[i]->isBreak()) 1713 { 1714 breakDepth++; 1715 } 1716 1717 if(breakDepth > 0) 1718 { 1719 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1720 { 1721 breakDepth++; 1722 } 1723 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1724 { 1725 breakDepth--; 1726 } 1727 1728 instruction[i]->analysisBreak = true; 1729 1730 if(instruction[i]->isCall()) 1731 { 1732 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); 1733 } 1734 } 1735 1736 // Continue statement 1737 if(instruction[i]->opcode == OPCODE_CONTINUE) 1738 { 1739 continueDepth++; 1740 } 1741 1742 if(continueDepth > 0) 1743 { 1744 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask 1745 { 1746 continueDepth++; 1747 } 1748 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) 1749 { 1750 continueDepth--; 1751 } 1752 1753 instruction[i]->analysisContinue = true; 1754 1755 if(instruction[i]->isCall()) 1756 { 1757 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); 1758 } 1759 } 1760 1761 // Return (leave) statement 1762 if(instruction[i]->opcode == OPCODE_LEAVE) 1763 { 1764 leaveReturn = true; 1765 1766 // Mark loop body instructions prior to the return statement 1767 for(unsigned int l = functionBegin; l < i; l++) 1768 { 1769 if(instruction[l]->isLoop()) 1770 { 1771 for(unsigned int r = l + 1; r < i; r++) 1772 { 1773 instruction[r]->analysisLeave = true; 1774 } 1775 1776 break; 1777 } 1778 } 1779 } 1780 else if(instruction[i]->opcode == OPCODE_RET) // End of the function 1781 { 1782 leaveReturn = false; 1783 } 1784 else if(instruction[i]->opcode == OPCODE_LABEL) 1785 { 1786 functionBegin = i; 1787 } 1788 1789 if(leaveReturn) 1790 { 1791 instruction[i]->analysisLeave = true; 1792 1793 if(instruction[i]->isCall()) 1794 { 1795 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); 1796 } 1797 } 1798 } 1799 } 1800 markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1801 void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag) 1802 { 1803 bool marker = false; 1804 for(auto &inst : instruction) 1805 { 1806 if(!marker) 1807 { 1808 if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel) 1809 { 1810 marker = true; 1811 } 1812 } 1813 else 1814 { 1815 if(inst->opcode == OPCODE_RET) 1816 { 1817 break; 1818 } 1819 else if(inst->isCall()) 1820 { 1821 markFunctionAnalysis(inst->dst.label, flag); 1822 } 1823 1824 inst->analysis |= flag; 1825 } 1826 } 1827 } 1828 analyzeSamplers()1829 void Shader::analyzeSamplers() 1830 { 1831 for(const auto &inst : instruction) 1832 { 1833 switch(inst->opcode) 1834 { 1835 case OPCODE_TEX: 1836 case OPCODE_TEXBEM: 1837 case OPCODE_TEXBEML: 1838 case OPCODE_TEXREG2AR: 1839 case OPCODE_TEXREG2GB: 1840 case OPCODE_TEXM3X2TEX: 1841 case OPCODE_TEXM3X3TEX: 1842 case OPCODE_TEXM3X3SPEC: 1843 case OPCODE_TEXM3X3VSPEC: 1844 case OPCODE_TEXREG2RGB: 1845 case OPCODE_TEXDP3TEX: 1846 case OPCODE_TEXM3X2DEPTH: 1847 case OPCODE_TEXLDD: 1848 case OPCODE_TEXLDL: 1849 case OPCODE_TEXLOD: 1850 case OPCODE_TEXOFFSET: 1851 case OPCODE_TEXOFFSETBIAS: 1852 case OPCODE_TEXLODOFFSET: 1853 case OPCODE_TEXELFETCH: 1854 case OPCODE_TEXELFETCHOFFSET: 1855 case OPCODE_TEXGRAD: 1856 case OPCODE_TEXGRADOFFSET: 1857 { 1858 Parameter &dst = inst->dst; 1859 Parameter &src1 = inst->src[1]; 1860 1861 if(majorVersion >= 2) 1862 { 1863 if(src1.type == PARAMETER_SAMPLER) 1864 { 1865 usedSamplers |= 1 << src1.index; 1866 } 1867 } 1868 else 1869 { 1870 usedSamplers |= 1 << dst.index; 1871 } 1872 } 1873 break; 1874 default: 1875 break; 1876 } 1877 } 1878 } 1879 1880 // Assigns a unique index to each call instruction, on a per label basis. 1881 // This is used to know what basic block to return to. analyzeCallSites()1882 void Shader::analyzeCallSites() 1883 { 1884 std::unordered_map<int, int> callSiteIndices; 1885 1886 for(auto &inst : instruction) 1887 { 1888 if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ) 1889 { 1890 inst->dst.callSite = callSiteIndices[inst->dst.label]++; 1891 } 1892 } 1893 } 1894 analyzeIndirectAddressing()1895 void Shader::analyzeIndirectAddressing() 1896 { 1897 indirectAddressableTemporaries = false; 1898 indirectAddressableInput = false; 1899 indirectAddressableOutput = false; 1900 1901 for(const auto &inst : instruction) 1902 { 1903 if(inst->dst.rel.type != PARAMETER_VOID) 1904 { 1905 switch(inst->dst.type) 1906 { 1907 case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; 1908 case PARAMETER_INPUT: indirectAddressableInput = true; break; 1909 case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; 1910 default: break; 1911 } 1912 } 1913 1914 for(int j = 0; j < 3; j++) 1915 { 1916 if(inst->src[j].rel.type != PARAMETER_VOID) 1917 { 1918 switch(inst->src[j].type) 1919 { 1920 case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; 1921 case PARAMETER_INPUT: indirectAddressableInput = true; break; 1922 case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; 1923 default: break; 1924 } 1925 } 1926 } 1927 } 1928 } 1929 1930 // analyzeLimits analyzes the whole shader program to determine the deepest 1931 // nesting of control flow blocks and function calls. These calculations 1932 // are stored into the limits member, and is used by the programs to 1933 // allocate stack storage variables. analyzeLimits()1934 void Shader::analyzeLimits() 1935 { 1936 typedef unsigned int FunctionID; 1937 1938 // Identifier of the function with the main entry point. 1939 constexpr FunctionID MAIN_ID = 0xF0000000; 1940 1941 // Invalid function identifier. 1942 constexpr FunctionID INVALID_ID = ~0U; 1943 1944 // Limits on a single function. 1945 struct FunctionLimits 1946 { 1947 uint32_t loops = 0; // maximum nested loop and reps. 1948 uint32_t ifs = 0; // maximum nested if statements. 1949 uint32_t stack = 0; // maximum call depth. 1950 }; 1951 1952 // Information about a single function in the shader. 1953 struct FunctionInfo 1954 { 1955 FunctionLimits limits; 1956 std::unordered_set<FunctionID> calls; // What this function calls. 1957 bool reachable; // Is this function reachable? 1958 }; 1959 1960 std::unordered_map<FunctionID, FunctionInfo> functions; 1961 1962 uint32_t maxLabel = 0; // Highest label found 1963 1964 // Add a definition for the main entry point. 1965 // This starts at the beginning of the instructions and does not have 1966 // its own label. 1967 functions[MAIN_ID] = FunctionInfo(); 1968 functions[MAIN_ID].reachable = true; 1969 1970 // Begin by doing a pass over the instructions to identify all the 1971 // functions. These start with a label and end with a ret. Note that 1972 // functions can have labels within them. 1973 FunctionID currentFunc = MAIN_ID; 1974 for(auto &inst : instruction) 1975 { 1976 switch (inst->opcode) 1977 { 1978 case OPCODE_LABEL: 1979 if (currentFunc == INVALID_ID) 1980 { 1981 // Start of a function. 1982 FunctionID id = inst->dst.label; 1983 ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else. 1984 functions[id] = FunctionInfo(); 1985 } 1986 break; 1987 case OPCODE_RET: 1988 currentFunc = INVALID_ID; 1989 break; 1990 default: 1991 break; 1992 } 1993 } 1994 1995 // Limits for the currently analyzed function. 1996 FunctionLimits currentLimits; 1997 1998 // Now loop over the instructions gathering the limits of each of the 1999 // functions. 2000 currentFunc = MAIN_ID; 2001 for(size_t i = 0; i < instruction.size(); i++) 2002 { 2003 const auto& inst = instruction[i]; 2004 switch (inst->opcode) 2005 { 2006 case OPCODE_LABEL: 2007 { 2008 maxLabel = std::max(maxLabel, inst->dst.label); 2009 if (currentFunc == INVALID_ID) 2010 { 2011 // Start of a function. 2012 FunctionID id = inst->dst.label; 2013 ASSERT(functions.find(id) != functions.end()); // Sanity check 2014 currentFunc = id; 2015 } 2016 break; 2017 } 2018 case OPCODE_CALL: 2019 case OPCODE_CALLNZ: 2020 { 2021 ASSERT(currentFunc != INVALID_ID); 2022 FunctionID id = inst->dst.label; 2023 ASSERT(functions.find(id) != functions.end()); 2024 functions[currentFunc].calls.emplace(id); 2025 functions[id].reachable = true; 2026 break; 2027 } 2028 case OPCODE_LOOP: 2029 case OPCODE_REP: 2030 case OPCODE_WHILE: 2031 case OPCODE_SWITCH: // Not a mistake - switches share loopReps. 2032 { 2033 ASSERT(currentFunc != INVALID_ID); 2034 ++currentLimits.loops; 2035 auto& func = functions[currentFunc]; 2036 func.limits.loops = std::max(func.limits.loops, currentLimits.loops); 2037 break; 2038 } 2039 case OPCODE_ENDLOOP: 2040 case OPCODE_ENDREP: 2041 case OPCODE_ENDWHILE: 2042 case OPCODE_ENDSWITCH: 2043 { 2044 ASSERT(currentLimits.loops > 0); 2045 --currentLimits.loops; 2046 break; 2047 } 2048 case OPCODE_IF: 2049 case OPCODE_IFC: 2050 { 2051 ASSERT(currentFunc != INVALID_ID); 2052 ++currentLimits.ifs; 2053 auto& func = functions[currentFunc]; 2054 func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs); 2055 break; 2056 } 2057 case OPCODE_ENDIF: 2058 { 2059 ASSERT(currentLimits.ifs > 0); 2060 currentLimits.ifs--; 2061 break; 2062 } 2063 case OPCODE_RET: 2064 { 2065 // Must be in a function to return. 2066 ASSERT(currentFunc != INVALID_ID); 2067 2068 // All stacks should be popped before returning. 2069 ASSERT(currentLimits.ifs == 0); 2070 ASSERT(currentLimits.loops == 0); 2071 2072 currentFunc = INVALID_ID; 2073 currentLimits = FunctionLimits(); 2074 break; 2075 } 2076 default: 2077 break; 2078 } 2079 } 2080 2081 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON) 2082 // Assert that every function is reachable (these should have been 2083 // stripped in earlier stages). Unreachable functions may be code 2084 // generated, but their own limits are not considered below, potentially 2085 // causing OOB indexing in later stages. 2086 // If we ever find cases where there are unreachable functions, we can 2087 // replace this assert with NO-OPing or stripping out the dead 2088 // functions. 2089 for (const auto &it : functions) { ASSERT(it.second.reachable); } 2090 #endif 2091 2092 // We have now gathered all the information about each of the functions 2093 // in the shader. Traverse these functions starting from the main 2094 // function to calculate the maximum limits across the entire shader. 2095 2096 std::unordered_set<FunctionID> visited; 2097 std::function<Limits(FunctionID)> traverse; 2098 traverse = [&](FunctionID id) -> Limits 2099 { 2100 const auto& func = functions[id]; 2101 ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed. 2102 visited.insert(id); 2103 Limits limits; 2104 limits.stack = 1; 2105 for (auto callee : func.calls) 2106 { 2107 auto calleeLimits = traverse(callee); 2108 limits.loops = std::max(limits.loops, calleeLimits.loops); 2109 limits.ifs = std::max(limits.ifs, calleeLimits.ifs); 2110 limits.stack = std::max(limits.stack, calleeLimits.stack + 1); 2111 } 2112 visited.erase(id); 2113 2114 limits.loops += func.limits.loops; 2115 limits.ifs += func.limits.ifs; 2116 return limits; 2117 }; 2118 2119 limits = traverse(MAIN_ID); 2120 limits.maxLabel = maxLabel; 2121 } 2122 } 2123