1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "OutputASM.h" 16 #include "Common/Math.hpp" 17 18 #include "common/debug.h" 19 #include "InfoSink.h" 20 21 #include "libGLESv2/Shader.h" 22 23 #include <GLES2/gl2.h> 24 #include <GLES2/gl2ext.h> 25 #include <GLES3/gl3.h> 26 #include <GL/glcorearb.h> 27 #include <GL/glext.h> 28 29 #include <stdlib.h> 30 31 namespace 32 { glVariableType(const TType & type)33 GLenum glVariableType(const TType &type) 34 { 35 switch(type.getBasicType()) 36 { 37 case EbtFloat: 38 if(type.isScalar()) 39 { 40 return GL_FLOAT; 41 } 42 else if(type.isVector()) 43 { 44 switch(type.getNominalSize()) 45 { 46 case 2: return GL_FLOAT_VEC2; 47 case 3: return GL_FLOAT_VEC3; 48 case 4: return GL_FLOAT_VEC4; 49 default: UNREACHABLE(type.getNominalSize()); 50 } 51 } 52 else if(type.isMatrix()) 53 { 54 switch(type.getNominalSize()) 55 { 56 case 2: 57 switch(type.getSecondarySize()) 58 { 59 case 2: return GL_FLOAT_MAT2; 60 case 3: return GL_FLOAT_MAT2x3; 61 case 4: return GL_FLOAT_MAT2x4; 62 default: UNREACHABLE(type.getSecondarySize()); 63 } 64 case 3: 65 switch(type.getSecondarySize()) 66 { 67 case 2: return GL_FLOAT_MAT3x2; 68 case 3: return GL_FLOAT_MAT3; 69 case 4: return GL_FLOAT_MAT3x4; 70 default: UNREACHABLE(type.getSecondarySize()); 71 } 72 case 4: 73 switch(type.getSecondarySize()) 74 { 75 case 2: return GL_FLOAT_MAT4x2; 76 case 3: return GL_FLOAT_MAT4x3; 77 case 4: return GL_FLOAT_MAT4; 78 default: UNREACHABLE(type.getSecondarySize()); 79 } 80 default: UNREACHABLE(type.getNominalSize()); 81 } 82 } 83 else UNREACHABLE(0); 84 break; 85 case EbtInt: 86 if(type.isScalar()) 87 { 88 return GL_INT; 89 } 90 else if(type.isVector()) 91 { 92 switch(type.getNominalSize()) 93 { 94 case 2: return GL_INT_VEC2; 95 case 3: return GL_INT_VEC3; 96 case 4: return GL_INT_VEC4; 97 default: UNREACHABLE(type.getNominalSize()); 98 } 99 } 100 else UNREACHABLE(0); 101 break; 102 case EbtUInt: 103 if(type.isScalar()) 104 { 105 return GL_UNSIGNED_INT; 106 } 107 else if(type.isVector()) 108 { 109 switch(type.getNominalSize()) 110 { 111 case 2: return GL_UNSIGNED_INT_VEC2; 112 case 3: return GL_UNSIGNED_INT_VEC3; 113 case 4: return GL_UNSIGNED_INT_VEC4; 114 default: UNREACHABLE(type.getNominalSize()); 115 } 116 } 117 else UNREACHABLE(0); 118 break; 119 case EbtBool: 120 if(type.isScalar()) 121 { 122 return GL_BOOL; 123 } 124 else if(type.isVector()) 125 { 126 switch(type.getNominalSize()) 127 { 128 case 2: return GL_BOOL_VEC2; 129 case 3: return GL_BOOL_VEC3; 130 case 4: return GL_BOOL_VEC4; 131 default: UNREACHABLE(type.getNominalSize()); 132 } 133 } 134 else UNREACHABLE(0); 135 break; 136 case EbtSampler2D: 137 return GL_SAMPLER_2D; 138 case EbtISampler2D: 139 return GL_INT_SAMPLER_2D; 140 case EbtUSampler2D: 141 return GL_UNSIGNED_INT_SAMPLER_2D; 142 case EbtSamplerCube: 143 return GL_SAMPLER_CUBE; 144 case EbtSampler2DRect: 145 return GL_SAMPLER_2D_RECT_ARB; 146 case EbtISamplerCube: 147 return GL_INT_SAMPLER_CUBE; 148 case EbtUSamplerCube: 149 return GL_UNSIGNED_INT_SAMPLER_CUBE; 150 case EbtSamplerExternalOES: 151 return GL_SAMPLER_EXTERNAL_OES; 152 case EbtSampler3D: 153 return GL_SAMPLER_3D_OES; 154 case EbtISampler3D: 155 return GL_INT_SAMPLER_3D; 156 case EbtUSampler3D: 157 return GL_UNSIGNED_INT_SAMPLER_3D; 158 case EbtSampler2DArray: 159 return GL_SAMPLER_2D_ARRAY; 160 case EbtISampler2DArray: 161 return GL_INT_SAMPLER_2D_ARRAY; 162 case EbtUSampler2DArray: 163 return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY; 164 case EbtSampler2DShadow: 165 return GL_SAMPLER_2D_SHADOW; 166 case EbtSamplerCubeShadow: 167 return GL_SAMPLER_CUBE_SHADOW; 168 case EbtSampler2DArrayShadow: 169 return GL_SAMPLER_2D_ARRAY_SHADOW; 170 default: 171 UNREACHABLE(type.getBasicType()); 172 break; 173 } 174 175 return GL_NONE; 176 } 177 glVariablePrecision(const TType & type)178 GLenum glVariablePrecision(const TType &type) 179 { 180 if(type.getBasicType() == EbtFloat) 181 { 182 switch(type.getPrecision()) 183 { 184 case EbpHigh: return GL_HIGH_FLOAT; 185 case EbpMedium: return GL_MEDIUM_FLOAT; 186 case EbpLow: return GL_LOW_FLOAT; 187 case EbpUndefined: 188 // Should be defined as the default precision by the parser 189 default: UNREACHABLE(type.getPrecision()); 190 } 191 } 192 else if(type.getBasicType() == EbtInt) 193 { 194 switch(type.getPrecision()) 195 { 196 case EbpHigh: return GL_HIGH_INT; 197 case EbpMedium: return GL_MEDIUM_INT; 198 case EbpLow: return GL_LOW_INT; 199 case EbpUndefined: 200 // Should be defined as the default precision by the parser 201 default: UNREACHABLE(type.getPrecision()); 202 } 203 } 204 205 // Other types (boolean, sampler) don't have a precision 206 return GL_NONE; 207 } 208 } 209 210 namespace glsl 211 { 212 // Integer to TString conversion str(int i)213 TString str(int i) 214 { 215 char buffer[20]; 216 sprintf(buffer, "%d", i); 217 return buffer; 218 } 219 220 class Temporary : public TIntermSymbol 221 { 222 public: Temporary(OutputASM * assembler)223 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler) 224 { 225 } 226 ~Temporary()227 ~Temporary() 228 { 229 assembler->freeTemporary(this); 230 } 231 232 private: 233 OutputASM *const assembler; 234 }; 235 236 class Constant : public TIntermConstantUnion 237 { 238 public: Constant(float x,float y,float z,float w)239 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false)) 240 { 241 constants[0].setFConst(x); 242 constants[1].setFConst(y); 243 constants[2].setFConst(z); 244 constants[3].setFConst(w); 245 } 246 Constant(bool b)247 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false)) 248 { 249 constants[0].setBConst(b); 250 } 251 Constant(int i)252 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false)) 253 { 254 constants[0].setIConst(i); 255 } 256 ~Constant()257 ~Constant() 258 { 259 } 260 261 private: 262 ConstantUnion constants[4]; 263 }; 264 ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) : 266 type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)), 267 name(name), arraySize(type.getArraySize()), registerIndex(registerIndex) 268 { 269 if(type.isStruct()) 270 { 271 for(const auto& field : type.getStruct()->fields()) 272 { 273 fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1)); 274 } 275 } 276 } 277 Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) : 279 ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo) 280 { 281 } 282 UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize, 284 TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) : 285 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout), 286 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId) 287 { 288 } 289 BlockLayoutEncoder()290 BlockLayoutEncoder::BlockLayoutEncoder() 291 : mCurrentOffset(0) 292 { 293 } 294 encodeType(const TType & type)295 BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type) 296 { 297 int arrayStride; 298 int matrixStride; 299 300 bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor; 301 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride); 302 303 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent), 304 static_cast<int>(arrayStride * BytesPerComponent), 305 static_cast<int>(matrixStride * BytesPerComponent), 306 (matrixStride > 0) && isRowMajor); 307 308 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride); 309 310 return memberInfo; 311 } 312 313 // static getBlockRegister(const BlockMemberInfo & info)314 size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info) 315 { 316 return (info.offset / BytesPerComponent) / ComponentsPerRegister; 317 } 318 319 // static getBlockRegisterElement(const BlockMemberInfo & info)320 size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info) 321 { 322 return (info.offset / BytesPerComponent) % ComponentsPerRegister; 323 } 324 nextRegister()325 void BlockLayoutEncoder::nextRegister() 326 { 327 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister); 328 } 329 Std140BlockEncoder()330 Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder() 331 { 332 } 333 enterAggregateType()334 void Std140BlockEncoder::enterAggregateType() 335 { 336 nextRegister(); 337 } 338 exitAggregateType()339 void Std140BlockEncoder::exitAggregateType() 340 { 341 nextRegister(); 342 } 343 getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut) 345 { 346 size_t baseAlignment = 0; 347 int matrixStride = 0; 348 int arrayStride = 0; 349 350 if(type.isMatrix()) 351 { 352 baseAlignment = ComponentsPerRegister; 353 matrixStride = ComponentsPerRegister; 354 355 if(arraySize > 0) 356 { 357 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 358 arrayStride = ComponentsPerRegister * numRegisters; 359 } 360 } 361 else if(arraySize > 0) 362 { 363 baseAlignment = ComponentsPerRegister; 364 arrayStride = ComponentsPerRegister; 365 } 366 else 367 { 368 const size_t numComponents = type.getElementSize(); 369 baseAlignment = (numComponents == 3 ? 4u : numComponents); 370 } 371 372 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment); 373 374 *matrixStrideOut = matrixStride; 375 *arrayStrideOut = arrayStride; 376 } 377 advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride) 379 { 380 if(arraySize > 0) 381 { 382 mCurrentOffset += arrayStride * arraySize; 383 } 384 else if(type.isMatrix()) 385 { 386 ASSERT(matrixStride == ComponentsPerRegister); 387 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 388 mCurrentOffset += ComponentsPerRegister * numRegisters; 389 } 390 else 391 { 392 mCurrentOffset += type.getElementSize(); 393 } 394 } 395 Attribute()396 Attribute::Attribute() 397 { 398 type = GL_NONE; 399 arraySize = 0; 400 registerIndex = 0; 401 } 402 Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex) 404 { 405 this->type = type; 406 this->name = name; 407 this->arraySize = arraySize; 408 this->layoutLocation = layoutLocation; 409 this->registerIndex = registerIndex; 410 } 411 getPixelShader() const412 sw::PixelShader *Shader::getPixelShader() const 413 { 414 return nullptr; 415 } 416 getVertexShader() const417 sw::VertexShader *Shader::getVertexShader() const 418 { 419 return nullptr; 420 } 421 TextureFunction(const TString & nodeName)422 OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false) 423 { 424 TString name = TFunction::unmangleName(nodeName); 425 426 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect") 427 { 428 method = IMPLICIT; 429 } 430 else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj") 431 { 432 method = IMPLICIT; 433 proj = true; 434 } 435 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod") 436 { 437 method = LOD; 438 } 439 else if(name == "texture2DProjLod" || name == "textureProjLod") 440 { 441 method = LOD; 442 proj = true; 443 } 444 else if(name == "textureSize") 445 { 446 method = SIZE; 447 } 448 else if(name == "textureOffset") 449 { 450 method = IMPLICIT; 451 offset = true; 452 } 453 else if(name == "textureProjOffset") 454 { 455 method = IMPLICIT; 456 offset = true; 457 proj = true; 458 } 459 else if(name == "textureLodOffset") 460 { 461 method = LOD; 462 offset = true; 463 } 464 else if(name == "textureProjLodOffset") 465 { 466 method = LOD; 467 proj = true; 468 offset = true; 469 } 470 else if(name == "texelFetch") 471 { 472 method = FETCH; 473 } 474 else if(name == "texelFetchOffset") 475 { 476 method = FETCH; 477 offset = true; 478 } 479 else if(name == "textureGrad") 480 { 481 method = GRAD; 482 } 483 else if(name == "textureGradOffset") 484 { 485 method = GRAD; 486 offset = true; 487 } 488 else if(name == "textureProjGrad") 489 { 490 method = GRAD; 491 proj = true; 492 } 493 else if(name == "textureProjGradOffset") 494 { 495 method = GRAD; 496 proj = true; 497 offset = true; 498 } 499 else UNREACHABLE(0); 500 } 501 OutputASM(TParseContext & context,Shader * shaderObject)502 OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context) 503 { 504 shader = nullptr; 505 pixelShader = nullptr; 506 vertexShader = nullptr; 507 508 if(shaderObject) 509 { 510 shader = shaderObject->getShader(); 511 pixelShader = shaderObject->getPixelShader(); 512 vertexShader = shaderObject->getVertexShader(); 513 } 514 515 functionArray.push_back(Function(0, "main(", nullptr, nullptr)); 516 currentFunction = 0; 517 outputQualifier = EvqOutput; // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData 518 } 519 ~OutputASM()520 OutputASM::~OutputASM() 521 { 522 } 523 output()524 void OutputASM::output() 525 { 526 if(shader) 527 { 528 emitShader(GLOBAL); 529 530 if(functionArray.size() > 1) // Only call main() when there are other functions 531 { 532 Instruction *callMain = emit(sw::Shader::OPCODE_CALL); 533 callMain->dst.type = sw::Shader::PARAMETER_LABEL; 534 callMain->dst.index = 0; // main() 535 536 emit(sw::Shader::OPCODE_RET); 537 } 538 539 emitShader(FUNCTION); 540 } 541 } 542 emitShader(Scope scope)543 void OutputASM::emitShader(Scope scope) 544 { 545 emitScope = scope; 546 currentScope = GLOBAL; 547 mContext.getTreeRoot()->traverse(this); 548 } 549 freeTemporary(Temporary * temporary)550 void OutputASM::freeTemporary(Temporary *temporary) 551 { 552 free(temporaries, temporary); 553 } 554 getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const 556 { 557 TBasicType baseType = in->getType().getBasicType(); 558 559 switch(op) 560 { 561 case sw::Shader::OPCODE_NEG: 562 switch(baseType) 563 { 564 case EbtInt: 565 case EbtUInt: 566 return sw::Shader::OPCODE_INEG; 567 case EbtFloat: 568 default: 569 return op; 570 } 571 case sw::Shader::OPCODE_ABS: 572 switch(baseType) 573 { 574 case EbtInt: 575 return sw::Shader::OPCODE_IABS; 576 case EbtFloat: 577 default: 578 return op; 579 } 580 case sw::Shader::OPCODE_SGN: 581 switch(baseType) 582 { 583 case EbtInt: 584 return sw::Shader::OPCODE_ISGN; 585 case EbtFloat: 586 default: 587 return op; 588 } 589 case sw::Shader::OPCODE_ADD: 590 switch(baseType) 591 { 592 case EbtInt: 593 case EbtUInt: 594 return sw::Shader::OPCODE_IADD; 595 case EbtFloat: 596 default: 597 return op; 598 } 599 case sw::Shader::OPCODE_SUB: 600 switch(baseType) 601 { 602 case EbtInt: 603 case EbtUInt: 604 return sw::Shader::OPCODE_ISUB; 605 case EbtFloat: 606 default: 607 return op; 608 } 609 case sw::Shader::OPCODE_MUL: 610 switch(baseType) 611 { 612 case EbtInt: 613 case EbtUInt: 614 return sw::Shader::OPCODE_IMUL; 615 case EbtFloat: 616 default: 617 return op; 618 } 619 case sw::Shader::OPCODE_DIV: 620 switch(baseType) 621 { 622 case EbtInt: 623 return sw::Shader::OPCODE_IDIV; 624 case EbtUInt: 625 return sw::Shader::OPCODE_UDIV; 626 case EbtFloat: 627 default: 628 return op; 629 } 630 case sw::Shader::OPCODE_IMOD: 631 return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op; 632 case sw::Shader::OPCODE_ISHR: 633 return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op; 634 case sw::Shader::OPCODE_MIN: 635 switch(baseType) 636 { 637 case EbtInt: 638 return sw::Shader::OPCODE_IMIN; 639 case EbtUInt: 640 return sw::Shader::OPCODE_UMIN; 641 case EbtFloat: 642 default: 643 return op; 644 } 645 case sw::Shader::OPCODE_MAX: 646 switch(baseType) 647 { 648 case EbtInt: 649 return sw::Shader::OPCODE_IMAX; 650 case EbtUInt: 651 return sw::Shader::OPCODE_UMAX; 652 case EbtFloat: 653 default: 654 return op; 655 } 656 default: 657 return op; 658 } 659 } 660 visitSymbol(TIntermSymbol * symbol)661 void OutputASM::visitSymbol(TIntermSymbol *symbol) 662 { 663 // The type of vertex outputs and fragment inputs with the same name must match (validated at link time), 664 // so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code). 665 switch(symbol->getQualifier()) 666 { 667 case EvqVaryingIn: 668 case EvqVaryingOut: 669 case EvqInvariantVaryingIn: 670 case EvqInvariantVaryingOut: 671 case EvqVertexOut: 672 case EvqFragmentIn: 673 if(symbol->getBasicType() != EbtInvariant) // Typeless declarations are not new varyings 674 { 675 declareVarying(symbol, -1); 676 } 677 break; 678 case EvqFragmentOut: 679 declareFragmentOutput(symbol); 680 break; 681 default: 682 break; 683 } 684 685 TInterfaceBlock* block = symbol->getType().getInterfaceBlock(); 686 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables: 687 // "All members of a named uniform block declared with a shared or std140 layout qualifier 688 // are considered active, even if they are not referenced in any shader in the program. 689 // The uniform block itself is also considered active, even if no member of the block is referenced." 690 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140))) 691 { 692 uniformRegister(symbol); 693 } 694 } 695 visitBinary(Visit visit,TIntermBinary * node)696 bool OutputASM::visitBinary(Visit visit, TIntermBinary *node) 697 { 698 if(currentScope != emitScope) 699 { 700 return false; 701 } 702 703 TIntermTyped *result = node; 704 TIntermTyped *left = node->getLeft(); 705 TIntermTyped *right = node->getRight(); 706 const TType &leftType = left->getType(); 707 const TType &rightType = right->getType(); 708 709 if(isSamplerRegister(result)) 710 { 711 return false; // Don't traverse, the register index is determined statically 712 } 713 714 switch(node->getOp()) 715 { 716 case EOpAssign: 717 assert(visit == PreVisit); 718 right->traverse(this); 719 assignLvalue(left, right); 720 copy(result, right); 721 return false; 722 case EOpInitialize: 723 assert(visit == PreVisit); 724 // Constant arrays go into the constant register file. 725 if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1) 726 { 727 for(int i = 0; i < left->totalRegisterCount(); i++) 728 { 729 emit(sw::Shader::OPCODE_DEF, left, i, right, i); 730 } 731 } 732 else 733 { 734 right->traverse(this); 735 copy(left, right); 736 } 737 return false; 738 case EOpMatrixTimesScalarAssign: 739 assert(visit == PreVisit); 740 right->traverse(this); 741 for(int i = 0; i < leftType.getNominalSize(); i++) 742 { 743 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right); 744 } 745 746 assignLvalue(left, result); 747 return false; 748 case EOpVectorTimesMatrixAssign: 749 assert(visit == PreVisit); 750 { 751 // The left operand may contain a swizzle serving double-duty as 752 // swizzle and writemask, so it's important that we traverse it 753 // first. Otherwise we may end up never setting up our left 754 // operand correctly. 755 left->traverse(this); 756 right->traverse(this); 757 int size = leftType.getNominalSize(); 758 759 for(int i = 0; i < size; i++) 760 { 761 Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i); 762 dot->dst.mask = 1 << i; 763 } 764 765 assignLvalue(left, result); 766 } 767 return false; 768 case EOpMatrixTimesMatrixAssign: 769 assert(visit == PreVisit); 770 { 771 right->traverse(this); 772 int dim = leftType.getNominalSize(); 773 774 for(int i = 0; i < dim; i++) 775 { 776 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 777 mul->src[1].swizzle = 0x00; 778 779 for(int j = 1; j < dim; j++) 780 { 781 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 782 mad->src[1].swizzle = j * 0x55; 783 } 784 } 785 786 assignLvalue(left, result); 787 } 788 return false; 789 case EOpIndexDirect: 790 case EOpIndexIndirect: 791 case EOpIndexDirectStruct: 792 case EOpIndexDirectInterfaceBlock: 793 assert(visit == PreVisit); 794 evaluateRvalue(node); 795 return false; 796 case EOpVectorSwizzle: 797 if(visit == PostVisit) 798 { 799 int swizzle = 0; 800 TIntermAggregate *components = right->getAsAggregate(); 801 802 if(components) 803 { 804 TIntermSequence &sequence = components->getSequence(); 805 int component = 0; 806 807 for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++) 808 { 809 TIntermConstantUnion *element = (*sit)->getAsConstantUnion(); 810 811 if(element) 812 { 813 int i = element->getUnionArrayPointer()[0].getIConst(); 814 swizzle |= i << (component * 2); 815 component++; 816 } 817 else UNREACHABLE(0); 818 } 819 } 820 else UNREACHABLE(0); 821 822 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left); 823 mov->src[0].swizzle = swizzle; 824 } 825 break; 826 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break; 827 case EOpAdd: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right); break; 828 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break; 829 case EOpSub: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right); break; 830 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break; 831 case EOpMul: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right); break; 832 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break; 833 case EOpDiv: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right); break; 834 case EOpIModAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break; 835 case EOpIMod: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right); break; 836 case EOpBitShiftLeftAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break; 837 case EOpBitShiftLeft: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right); break; 838 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break; 839 case EOpBitShiftRight: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right); break; 840 case EOpBitwiseAndAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break; 841 case EOpBitwiseAnd: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right); break; 842 case EOpBitwiseXorAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break; 843 case EOpBitwiseXor: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right); break; 844 case EOpBitwiseOrAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right); break; 845 case EOpBitwiseOr: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right); break; 846 case EOpEqual: 847 if(visit == PostVisit) 848 { 849 emitBinary(sw::Shader::OPCODE_EQ, result, left, right); 850 851 for(int index = 1; index < left->totalRegisterCount(); index++) 852 { 853 Temporary equal(this); 854 emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index); 855 emit(sw::Shader::OPCODE_AND, result, result, &equal); 856 } 857 } 858 break; 859 case EOpNotEqual: 860 if(visit == PostVisit) 861 { 862 emitBinary(sw::Shader::OPCODE_NE, result, left, right); 863 864 for(int index = 1; index < left->totalRegisterCount(); index++) 865 { 866 Temporary notEqual(this); 867 emit(sw::Shader::OPCODE_NE, ¬Equal, 0, left, index, right, index); 868 emit(sw::Shader::OPCODE_OR, result, result, ¬Equal); 869 } 870 } 871 break; 872 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break; 873 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break; 874 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break; 875 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break; 876 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break; 877 case EOpVectorTimesScalar: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break; 878 case EOpMatrixTimesScalar: 879 if(visit == PostVisit) 880 { 881 if(left->isMatrix()) 882 { 883 for(int i = 0; i < leftType.getNominalSize(); i++) 884 { 885 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0); 886 } 887 } 888 else if(right->isMatrix()) 889 { 890 for(int i = 0; i < rightType.getNominalSize(); i++) 891 { 892 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 893 } 894 } 895 else UNREACHABLE(0); 896 } 897 break; 898 case EOpVectorTimesMatrix: 899 if(visit == PostVisit) 900 { 901 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize()); 902 903 int size = rightType.getNominalSize(); 904 for(int i = 0; i < size; i++) 905 { 906 Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i); 907 dot->dst.mask = 1 << i; 908 } 909 } 910 break; 911 case EOpMatrixTimesVector: 912 if(visit == PostVisit) 913 { 914 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right); 915 mul->src[1].swizzle = 0x00; 916 917 int size = rightType.getNominalSize(); 918 for(int i = 1; i < size; i++) 919 { 920 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result); 921 mad->src[1].swizzle = i * 0x55; 922 } 923 } 924 break; 925 case EOpMatrixTimesMatrix: 926 if(visit == PostVisit) 927 { 928 int dim = leftType.getNominalSize(); 929 930 int size = rightType.getNominalSize(); 931 for(int i = 0; i < size; i++) 932 { 933 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 934 mul->src[1].swizzle = 0x00; 935 936 for(int j = 1; j < dim; j++) 937 { 938 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 939 mad->src[1].swizzle = j * 0x55; 940 } 941 } 942 } 943 break; 944 case EOpLogicalOr: 945 if(trivial(right, 6)) 946 { 947 if(visit == PostVisit) 948 { 949 emit(sw::Shader::OPCODE_OR, result, left, right); 950 } 951 } 952 else // Short-circuit evaluation 953 { 954 if(visit == InVisit) 955 { 956 emit(sw::Shader::OPCODE_MOV, result, left); 957 Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result); 958 ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT; 959 } 960 else if(visit == PostVisit) 961 { 962 emit(sw::Shader::OPCODE_MOV, result, right); 963 emit(sw::Shader::OPCODE_ENDIF); 964 } 965 } 966 break; 967 case EOpLogicalXor: if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break; 968 case EOpLogicalAnd: 969 if(trivial(right, 6)) 970 { 971 if(visit == PostVisit) 972 { 973 emit(sw::Shader::OPCODE_AND, result, left, right); 974 } 975 } 976 else // Short-circuit evaluation 977 { 978 if(visit == InVisit) 979 { 980 emit(sw::Shader::OPCODE_MOV, result, left); 981 emit(sw::Shader::OPCODE_IF, 0, result); 982 } 983 else if(visit == PostVisit) 984 { 985 emit(sw::Shader::OPCODE_MOV, result, right); 986 emit(sw::Shader::OPCODE_ENDIF); 987 } 988 } 989 break; 990 default: UNREACHABLE(node->getOp()); 991 } 992 993 return true; 994 } 995 emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow) 997 { 998 switch(size) 999 { 1000 case 1: // Used for cofactor computation only 1001 { 1002 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1003 bool isMov = (row == col); 1004 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG; 1005 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row); 1006 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col); 1007 mov->dst.mask = 1 << outRow; 1008 } 1009 break; 1010 case 2: 1011 { 1012 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy 1013 1014 bool isCofactor = (col >= 0) && (row >= 0); 1015 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1016 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1017 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1018 1019 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1); 1020 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2]; 1021 det->dst.mask = 1 << outRow; 1022 } 1023 break; 1024 case 3: 1025 { 1026 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw 1027 1028 bool isCofactor = (col >= 0) && (row >= 0); 1029 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1030 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1031 int col2 = (isCofactor && (col <= 2)) ? 3 : 2; 1032 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1033 1034 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2); 1035 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3]; 1036 det->dst.mask = 1 << outRow; 1037 } 1038 break; 1039 case 4: 1040 { 1041 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3); 1042 det->dst.mask = 1 << outRow; 1043 } 1044 break; 1045 default: 1046 UNREACHABLE(size); 1047 break; 1048 } 1049 } 1050 visitUnary(Visit visit,TIntermUnary * node)1051 bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) 1052 { 1053 if(currentScope != emitScope) 1054 { 1055 return false; 1056 } 1057 1058 TIntermTyped *result = node; 1059 TIntermTyped *arg = node->getOperand(); 1060 TBasicType basicType = arg->getType().getBasicType(); 1061 1062 union 1063 { 1064 float f; 1065 int i; 1066 } one_value; 1067 1068 if(basicType == EbtInt || basicType == EbtUInt) 1069 { 1070 one_value.i = 1; 1071 } 1072 else 1073 { 1074 one_value.f = 1.0f; 1075 } 1076 1077 Constant one(one_value.f, one_value.f, one_value.f, one_value.f); 1078 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f); 1079 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f); 1080 1081 switch(node->getOp()) 1082 { 1083 case EOpNegative: 1084 if(visit == PostVisit) 1085 { 1086 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg); 1087 for(int index = 0; index < arg->totalRegisterCount(); index++) 1088 { 1089 emit(negOpcode, result, index, arg, index); 1090 } 1091 } 1092 break; 1093 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1094 case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1095 case EOpBitwiseNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1096 case EOpPostIncrement: 1097 if(visit == PostVisit) 1098 { 1099 copy(result, arg); 1100 1101 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1102 for(int index = 0; index < arg->totalRegisterCount(); index++) 1103 { 1104 emit(addOpcode, arg, index, arg, index, &one); 1105 } 1106 1107 assignLvalue(arg, arg); 1108 } 1109 break; 1110 case EOpPostDecrement: 1111 if(visit == PostVisit) 1112 { 1113 copy(result, arg); 1114 1115 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1116 for(int index = 0; index < arg->totalRegisterCount(); index++) 1117 { 1118 emit(subOpcode, arg, index, arg, index, &one); 1119 } 1120 1121 assignLvalue(arg, arg); 1122 } 1123 break; 1124 case EOpPreIncrement: 1125 if(visit == PostVisit) 1126 { 1127 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1128 for(int index = 0; index < arg->totalRegisterCount(); index++) 1129 { 1130 emit(addOpcode, result, index, arg, index, &one); 1131 } 1132 1133 assignLvalue(arg, result); 1134 } 1135 break; 1136 case EOpPreDecrement: 1137 if(visit == PostVisit) 1138 { 1139 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1140 for(int index = 0; index < arg->totalRegisterCount(); index++) 1141 { 1142 emit(subOpcode, result, index, arg, index, &one); 1143 } 1144 1145 assignLvalue(arg, result); 1146 } 1147 break; 1148 case EOpRadians: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break; 1149 case EOpDegrees: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, °); break; 1150 case EOpSin: if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break; 1151 case EOpCos: if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break; 1152 case EOpTan: if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break; 1153 case EOpAsin: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break; 1154 case EOpAcos: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break; 1155 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break; 1156 case EOpSinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break; 1157 case EOpCosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break; 1158 case EOpTanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break; 1159 case EOpAsinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break; 1160 case EOpAcosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break; 1161 case EOpAtanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break; 1162 case EOpExp: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break; 1163 case EOpLog: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break; 1164 case EOpExp2: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break; 1165 case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break; 1166 case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break; 1167 case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break; 1168 case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break; 1169 case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break; 1170 case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break; 1171 case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break; 1172 case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break; 1173 case EOpRoundEven: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break; 1174 case EOpCeil: if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break; 1175 case EOpFract: if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break; 1176 case EOpIsNan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break; 1177 case EOpIsInf: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break; 1178 case EOpLength: if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break; 1179 case EOpNormalize: if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break; 1180 case EOpDFdx: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break; 1181 case EOpDFdy: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break; 1182 case EOpFwidth: if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break; 1183 case EOpAny: if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break; 1184 case EOpAll: if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break; 1185 case EOpFloatBitsToInt: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break; 1186 case EOpFloatBitsToUint: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break; 1187 case EOpIntBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break; 1188 case EOpUintBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break; 1189 case EOpPackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break; 1190 case EOpPackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break; 1191 case EOpPackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break; 1192 case EOpUnpackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break; 1193 case EOpUnpackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break; 1194 case EOpUnpackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break; 1195 case EOpTranspose: 1196 if(visit == PostVisit) 1197 { 1198 int numCols = arg->getNominalSize(); 1199 int numRows = arg->getSecondarySize(); 1200 for(int i = 0; i < numCols; ++i) 1201 { 1202 for(int j = 0; j < numRows; ++j) 1203 { 1204 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i); 1205 mov->src[0].swizzle = 0x55 * j; 1206 mov->dst.mask = 1 << i; 1207 } 1208 } 1209 } 1210 break; 1211 case EOpDeterminant: 1212 if(visit == PostVisit) 1213 { 1214 int size = arg->getNominalSize(); 1215 ASSERT(size == arg->getSecondarySize()); 1216 1217 emitDeterminant(result, arg, size); 1218 } 1219 break; 1220 case EOpInverse: 1221 if(visit == PostVisit) 1222 { 1223 int size = arg->getNominalSize(); 1224 ASSERT(size == arg->getSecondarySize()); 1225 1226 // Compute transposed matrix of cofactors 1227 for(int i = 0; i < size; ++i) 1228 { 1229 for(int j = 0; j < size; ++j) 1230 { 1231 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1232 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant 1233 emitDeterminant(result, arg, size - 1, j, i, i, j); 1234 } 1235 } 1236 1237 // Compute 1 / determinant 1238 Temporary invDet(this); 1239 emitDeterminant(&invDet, arg, size); 1240 Constant one(1.0f, 1.0f, 1.0f, 1.0f); 1241 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet); 1242 div->src[1].swizzle = 0x00; // xxxx 1243 1244 // Divide transposed matrix of cofactors by determinant 1245 for(int i = 0; i < size; ++i) 1246 { 1247 emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet); 1248 } 1249 } 1250 break; 1251 default: UNREACHABLE(node->getOp()); 1252 } 1253 1254 return true; 1255 } 1256 visitAggregate(Visit visit,TIntermAggregate * node)1257 bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node) 1258 { 1259 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence) 1260 { 1261 return false; 1262 } 1263 1264 Constant zero(0.0f, 0.0f, 0.0f, 0.0f); 1265 1266 TIntermTyped *result = node; 1267 const TType &resultType = node->getType(); 1268 TIntermSequence &arg = node->getSequence(); 1269 size_t argumentCount = arg.size(); 1270 1271 switch(node->getOp()) 1272 { 1273 case EOpSequence: break; 1274 case EOpDeclaration: break; 1275 case EOpInvariantDeclaration: break; 1276 case EOpPrototype: break; 1277 case EOpComma: 1278 if(visit == PostVisit) 1279 { 1280 copy(result, arg[1]); 1281 } 1282 break; 1283 case EOpFunction: 1284 if(visit == PreVisit) 1285 { 1286 const TString &name = node->getName(); 1287 1288 if(emitScope == FUNCTION) 1289 { 1290 if(functionArray.size() > 1) // No need for a label when there's only main() 1291 { 1292 Instruction *label = emit(sw::Shader::OPCODE_LABEL); 1293 label->dst.type = sw::Shader::PARAMETER_LABEL; 1294 1295 const Function *function = findFunction(name); 1296 ASSERT(function); // Should have been added during global pass 1297 label->dst.index = function->label; 1298 currentFunction = function->label; 1299 } 1300 } 1301 else if(emitScope == GLOBAL) 1302 { 1303 if(name != "main(") 1304 { 1305 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence(); 1306 functionArray.push_back(Function(functionArray.size(), name, &arguments, node)); 1307 } 1308 } 1309 else UNREACHABLE(emitScope); 1310 1311 currentScope = FUNCTION; 1312 } 1313 else if(visit == PostVisit) 1314 { 1315 if(emitScope == FUNCTION) 1316 { 1317 if(functionArray.size() > 1) // No need to return when there's only main() 1318 { 1319 emit(sw::Shader::OPCODE_RET); 1320 } 1321 } 1322 1323 currentScope = GLOBAL; 1324 } 1325 break; 1326 case EOpFunctionCall: 1327 if(visit == PostVisit) 1328 { 1329 if(node->isUserDefined()) 1330 { 1331 const TString &name = node->getName(); 1332 const Function *function = findFunction(name); 1333 1334 if(!function) 1335 { 1336 mContext.error(node->getLine(), "function definition not found", name.c_str()); 1337 return false; 1338 } 1339 1340 TIntermSequence &arguments = *function->arg; 1341 1342 for(size_t i = 0; i < argumentCount; i++) 1343 { 1344 TIntermTyped *in = arguments[i]->getAsTyped(); 1345 1346 if(in->getQualifier() == EvqIn || 1347 in->getQualifier() == EvqInOut || 1348 in->getQualifier() == EvqConstReadOnly) 1349 { 1350 copy(in, arg[i]); 1351 } 1352 } 1353 1354 Instruction *call = emit(sw::Shader::OPCODE_CALL); 1355 call->dst.type = sw::Shader::PARAMETER_LABEL; 1356 call->dst.index = function->label; 1357 1358 if(function->ret && function->ret->getType().getBasicType() != EbtVoid) 1359 { 1360 copy(result, function->ret); 1361 } 1362 1363 for(size_t i = 0; i < argumentCount; i++) 1364 { 1365 TIntermTyped *argument = arguments[i]->getAsTyped(); 1366 TIntermTyped *out = arg[i]->getAsTyped(); 1367 1368 if(argument->getQualifier() == EvqOut || 1369 argument->getQualifier() == EvqInOut) 1370 { 1371 assignLvalue(out, argument); 1372 } 1373 } 1374 } 1375 else 1376 { 1377 const TextureFunction textureFunction(node->getName()); 1378 TIntermTyped *s = arg[0]->getAsTyped(); 1379 TIntermTyped *t = arg[1]->getAsTyped(); 1380 1381 Temporary coord(this); 1382 1383 if(textureFunction.proj) 1384 { 1385 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]); 1386 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1); 1387 rcp->dst.mask = 0x7; 1388 1389 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord); 1390 mul->dst.mask = 0x7; 1391 1392 if(IsShadowSampler(s->getBasicType())) 1393 { 1394 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1395 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord); 1396 mov->src[0].swizzle = 0xA4; 1397 } 1398 } 1399 else 1400 { 1401 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]); 1402 1403 if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3) 1404 { 1405 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1406 mov->src[0].swizzle = 0xA4; 1407 } 1408 } 1409 1410 switch(textureFunction.method) 1411 { 1412 case TextureFunction::IMPLICIT: 1413 if(!textureFunction.offset) 1414 { 1415 if(argumentCount == 2) 1416 { 1417 emit(sw::Shader::OPCODE_TEX, result, &coord, s); 1418 } 1419 else if(argumentCount == 3) // Bias 1420 { 1421 emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]); 1422 } 1423 else UNREACHABLE(argumentCount); 1424 } 1425 else // Offset 1426 { 1427 if(argumentCount == 3) 1428 { 1429 emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]); 1430 } 1431 else if(argumentCount == 4) // Bias 1432 { 1433 emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]); 1434 } 1435 else UNREACHABLE(argumentCount); 1436 } 1437 break; 1438 case TextureFunction::LOD: 1439 if(!textureFunction.offset && argumentCount == 3) 1440 { 1441 emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]); 1442 } 1443 else if(argumentCount == 4) // Offset 1444 { 1445 emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]); 1446 } 1447 else UNREACHABLE(argumentCount); 1448 break; 1449 case TextureFunction::FETCH: 1450 if(!textureFunction.offset && argumentCount == 3) 1451 { 1452 emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]); 1453 } 1454 else if(argumentCount == 4) // Offset 1455 { 1456 emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]); 1457 } 1458 else UNREACHABLE(argumentCount); 1459 break; 1460 case TextureFunction::GRAD: 1461 if(!textureFunction.offset && argumentCount == 4) 1462 { 1463 emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]); 1464 } 1465 else if(argumentCount == 5) // Offset 1466 { 1467 emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]); 1468 } 1469 else UNREACHABLE(argumentCount); 1470 break; 1471 case TextureFunction::SIZE: 1472 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s); 1473 break; 1474 default: 1475 UNREACHABLE(textureFunction.method); 1476 } 1477 } 1478 } 1479 break; 1480 case EOpParameters: 1481 break; 1482 case EOpConstructFloat: 1483 case EOpConstructVec2: 1484 case EOpConstructVec3: 1485 case EOpConstructVec4: 1486 case EOpConstructBool: 1487 case EOpConstructBVec2: 1488 case EOpConstructBVec3: 1489 case EOpConstructBVec4: 1490 case EOpConstructInt: 1491 case EOpConstructIVec2: 1492 case EOpConstructIVec3: 1493 case EOpConstructIVec4: 1494 case EOpConstructUInt: 1495 case EOpConstructUVec2: 1496 case EOpConstructUVec3: 1497 case EOpConstructUVec4: 1498 if(visit == PostVisit) 1499 { 1500 int component = 0; 1501 int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0; 1502 int arrayComponents = result->getType().getElementSize(); 1503 for(size_t i = 0; i < argumentCount; i++) 1504 { 1505 TIntermTyped *argi = arg[i]->getAsTyped(); 1506 int size = argi->getNominalSize(); 1507 int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex); 1508 int swizzle = component - (arrayIndex * arrayComponents); 1509 1510 if(!argi->isMatrix()) 1511 { 1512 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1513 mov->dst.mask = (0xF << swizzle) & 0xF; 1514 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1515 1516 component += size; 1517 } 1518 else if(!result->isMatrix()) // Construct a non matrix from a matrix 1519 { 1520 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1521 mov->dst.mask = (0xF << swizzle) & 0xF; 1522 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1523 1524 // At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3 1525 if(result->getNominalSize() > size) 1526 { 1527 Instruction *mov = emitCast(result, arrayIndex, argi, 1); 1528 mov->dst.mask = (0xF << (swizzle + size)) & 0xF; 1529 // mat2: xxxy (0x40), mat3: xxxx (0x00) 1530 mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2); 1531 } 1532 1533 component += size; 1534 } 1535 else // Matrix 1536 { 1537 int column = 0; 1538 1539 while(component < resultType.getNominalSize()) 1540 { 1541 Instruction *mov = emitCast(result, arrayIndex, argi, column); 1542 mov->dst.mask = (0xF << swizzle) & 0xF; 1543 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1544 1545 column++; 1546 component += size; 1547 } 1548 } 1549 } 1550 } 1551 break; 1552 case EOpConstructMat2: 1553 case EOpConstructMat2x3: 1554 case EOpConstructMat2x4: 1555 case EOpConstructMat3x2: 1556 case EOpConstructMat3: 1557 case EOpConstructMat3x4: 1558 case EOpConstructMat4x2: 1559 case EOpConstructMat4x3: 1560 case EOpConstructMat4: 1561 if(visit == PostVisit) 1562 { 1563 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1564 const int outCols = result->getNominalSize(); 1565 const int outRows = result->getSecondarySize(); 1566 1567 if(arg0->isScalar() && arg.size() == 1) // Construct scale matrix 1568 { 1569 for(int i = 0; i < outCols; i++) 1570 { 1571 emit(sw::Shader::OPCODE_MOV, result, i, &zero); 1572 if (i < outRows) 1573 { 1574 // Insert the scalar value on the main diagonal. 1575 // For non-square matrices, Avoid emitting in 1576 // a column which doesn't /have/ a main diagonal 1577 // element, even though it would be fairly benign -- 1578 // it's not necessarily trivial for downstream 1579 // passes to see that this is redundant and strip it 1580 // out. 1581 Instruction *mov = emitCast(result, i, arg0, 0); 1582 mov->dst.mask = 1 << i; 1583 ASSERT(mov->src[0].swizzle == 0x00); 1584 } 1585 } 1586 } 1587 else if(arg0->isMatrix()) 1588 { 1589 int arraySize = result->isArray() ? result->getArraySize() : 1; 1590 1591 for(int n = 0; n < arraySize; n++) 1592 { 1593 TIntermTyped *argi = arg[n]->getAsTyped(); 1594 const int inCols = argi->getNominalSize(); 1595 const int inRows = argi->getSecondarySize(); 1596 1597 for(int i = 0; i < outCols; i++) 1598 { 1599 if(i >= inCols || outRows > inRows) 1600 { 1601 // Initialize to identity matrix 1602 Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f)); 1603 emitCast(result, i + n * outCols, &col, 0); 1604 } 1605 1606 if(i < inCols) 1607 { 1608 Instruction *mov = emitCast(result, i + n * outCols, argi, i); 1609 mov->dst.mask = 0xF >> (4 - inRows); 1610 } 1611 } 1612 } 1613 } 1614 else 1615 { 1616 int column = 0; 1617 int row = 0; 1618 1619 for(size_t i = 0; i < argumentCount; i++) 1620 { 1621 TIntermTyped *argi = arg[i]->getAsTyped(); 1622 int size = argi->getNominalSize(); 1623 int element = 0; 1624 1625 while(element < size) 1626 { 1627 Instruction *mov = emitCast(result, column, argi, 0); 1628 mov->dst.mask = (0xF << row) & 0xF; 1629 mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element; 1630 1631 int end = row + size - element; 1632 column = end >= outRows ? column + 1 : column; 1633 element = element + outRows - row; 1634 row = end >= outRows ? 0 : end; 1635 } 1636 } 1637 } 1638 } 1639 break; 1640 case EOpConstructStruct: 1641 if(visit == PostVisit) 1642 { 1643 int offset = 0; 1644 for(size_t i = 0; i < argumentCount; i++) 1645 { 1646 TIntermTyped *argi = arg[i]->getAsTyped(); 1647 int size = argi->totalRegisterCount(); 1648 1649 for(int index = 0; index < size; index++) 1650 { 1651 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index); 1652 mov->dst.mask = writeMask(result, offset + index); 1653 } 1654 1655 offset += size; 1656 } 1657 } 1658 break; 1659 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break; 1660 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break; 1661 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break; 1662 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break; 1663 case EOpVectorEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break; 1664 case EOpVectorNotEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break; 1665 case EOpMod: if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break; 1666 case EOpModf: 1667 if(visit == PostVisit) 1668 { 1669 TIntermTyped* arg1 = arg[1]->getAsTyped(); 1670 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]); 1671 assignLvalue(arg1, arg1); 1672 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1); 1673 } 1674 break; 1675 case EOpPow: if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break; 1676 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break; 1677 case EOpMin: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break; 1678 case EOpMax: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break; 1679 case EOpClamp: 1680 if(visit == PostVisit) 1681 { 1682 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); 1683 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]); 1684 } 1685 break; 1686 case EOpMix: 1687 if(visit == PostVisit) 1688 { 1689 if(arg[2]->getAsTyped()->getBasicType() == EbtBool) 1690 { 1691 emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]); 1692 } 1693 else 1694 { 1695 emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); 1696 } 1697 } 1698 break; 1699 case EOpStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break; 1700 case EOpSmoothStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break; 1701 case EOpDistance: if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break; 1702 case EOpDot: if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break; 1703 case EOpCross: if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break; 1704 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1705 case EOpReflect: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break; 1706 case EOpRefract: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1707 case EOpMul: 1708 if(visit == PostVisit) 1709 { 1710 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1711 ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) && 1712 (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize())); 1713 1714 int size = arg0->getNominalSize(); 1715 for(int i = 0; i < size; i++) 1716 { 1717 emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i); 1718 } 1719 } 1720 break; 1721 case EOpOuterProduct: 1722 if(visit == PostVisit) 1723 { 1724 for(int i = 0; i < dim(arg[1]); i++) 1725 { 1726 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]); 1727 mul->src[1].swizzle = 0x55 * i; 1728 } 1729 } 1730 break; 1731 default: UNREACHABLE(node->getOp()); 1732 } 1733 1734 return true; 1735 } 1736 visitSelection(Visit visit,TIntermSelection * node)1737 bool OutputASM::visitSelection(Visit visit, TIntermSelection *node) 1738 { 1739 if(currentScope != emitScope) 1740 { 1741 return false; 1742 } 1743 1744 TIntermTyped *condition = node->getCondition(); 1745 TIntermNode *trueBlock = node->getTrueBlock(); 1746 TIntermNode *falseBlock = node->getFalseBlock(); 1747 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 1748 1749 condition->traverse(this); 1750 1751 if(node->usesTernaryOperator()) 1752 { 1753 if(constantCondition) 1754 { 1755 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1756 1757 if(trueCondition) 1758 { 1759 trueBlock->traverse(this); 1760 copy(node, trueBlock); 1761 } 1762 else 1763 { 1764 falseBlock->traverse(this); 1765 copy(node, falseBlock); 1766 } 1767 } 1768 else if(trivial(node, 6)) // Fast to compute both potential results and no side effects 1769 { 1770 trueBlock->traverse(this); 1771 falseBlock->traverse(this); 1772 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock); 1773 } 1774 else 1775 { 1776 emit(sw::Shader::OPCODE_IF, 0, condition); 1777 1778 if(trueBlock) 1779 { 1780 trueBlock->traverse(this); 1781 copy(node, trueBlock); 1782 } 1783 1784 if(falseBlock) 1785 { 1786 emit(sw::Shader::OPCODE_ELSE); 1787 falseBlock->traverse(this); 1788 copy(node, falseBlock); 1789 } 1790 1791 emit(sw::Shader::OPCODE_ENDIF); 1792 } 1793 } 1794 else // if/else statement 1795 { 1796 if(constantCondition) 1797 { 1798 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1799 1800 if(trueCondition) 1801 { 1802 if(trueBlock) 1803 { 1804 trueBlock->traverse(this); 1805 } 1806 } 1807 else 1808 { 1809 if(falseBlock) 1810 { 1811 falseBlock->traverse(this); 1812 } 1813 } 1814 } 1815 else 1816 { 1817 emit(sw::Shader::OPCODE_IF, 0, condition); 1818 1819 if(trueBlock) 1820 { 1821 trueBlock->traverse(this); 1822 } 1823 1824 if(falseBlock) 1825 { 1826 emit(sw::Shader::OPCODE_ELSE); 1827 falseBlock->traverse(this); 1828 } 1829 1830 emit(sw::Shader::OPCODE_ENDIF); 1831 } 1832 } 1833 1834 return false; 1835 } 1836 visitLoop(Visit visit,TIntermLoop * node)1837 bool OutputASM::visitLoop(Visit visit, TIntermLoop *node) 1838 { 1839 if(currentScope != emitScope) 1840 { 1841 return false; 1842 } 1843 1844 LoopInfo loop(node); 1845 1846 if(loop.iterations == 0) 1847 { 1848 return false; 1849 } 1850 1851 bool unroll = (loop.iterations <= 4); 1852 1853 TIntermNode *init = node->getInit(); 1854 TIntermTyped *condition = node->getCondition(); 1855 TIntermTyped *expression = node->getExpression(); 1856 TIntermNode *body = node->getBody(); 1857 Constant True(true); 1858 1859 if(loop.isDeterministic()) 1860 { 1861 deterministicVariables.insert(loop.index->getId()); 1862 1863 if(!unroll) 1864 { 1865 emit(sw::Shader::OPCODE_SCALAR); // Unrolled loops don't have an ENDWHILE to disable scalar mode. 1866 } 1867 } 1868 1869 if(node->getType() == ELoopDoWhile) 1870 { 1871 Temporary iterate(this); 1872 emit(sw::Shader::OPCODE_MOV, &iterate, &True); 1873 1874 emit(sw::Shader::OPCODE_WHILE, 0, &iterate); // FIXME: Implement real do-while 1875 1876 if(body) 1877 { 1878 body->traverse(this); 1879 } 1880 1881 emit(sw::Shader::OPCODE_TEST); 1882 1883 condition->traverse(this); 1884 emit(sw::Shader::OPCODE_MOV, &iterate, condition); 1885 1886 emit(sw::Shader::OPCODE_ENDWHILE); 1887 } 1888 else 1889 { 1890 if(init) 1891 { 1892 init->traverse(this); 1893 } 1894 1895 if(unroll) 1896 { 1897 mContext.info(node->getLine(), "loop unrolled", "for"); 1898 1899 for(unsigned int i = 0; i < loop.iterations; i++) 1900 { 1901 // condition->traverse(this); // Condition could contain statements, but not in an unrollable loop 1902 1903 if(body) 1904 { 1905 body->traverse(this); 1906 } 1907 1908 if(expression) 1909 { 1910 expression->traverse(this); 1911 } 1912 } 1913 } 1914 else 1915 { 1916 if(condition) 1917 { 1918 condition->traverse(this); 1919 } 1920 else 1921 { 1922 condition = &True; 1923 } 1924 1925 emit(sw::Shader::OPCODE_WHILE, 0, condition); 1926 1927 if(body) 1928 { 1929 body->traverse(this); 1930 } 1931 1932 emit(sw::Shader::OPCODE_TEST); 1933 1934 if(loop.isDeterministic()) 1935 { 1936 emit(sw::Shader::OPCODE_SCALAR); 1937 } 1938 1939 if(expression) 1940 { 1941 expression->traverse(this); 1942 } 1943 1944 if(condition) 1945 { 1946 condition->traverse(this); 1947 } 1948 1949 emit(sw::Shader::OPCODE_ENDWHILE); 1950 } 1951 } 1952 1953 if(loop.isDeterministic()) 1954 { 1955 deterministicVariables.erase(loop.index->getId()); 1956 } 1957 1958 return false; 1959 } 1960 visitBranch(Visit visit,TIntermBranch * node)1961 bool OutputASM::visitBranch(Visit visit, TIntermBranch *node) 1962 { 1963 if(currentScope != emitScope) 1964 { 1965 return false; 1966 } 1967 1968 switch(node->getFlowOp()) 1969 { 1970 case EOpKill: if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD); break; 1971 case EOpBreak: if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK); break; 1972 case EOpContinue: if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break; 1973 case EOpReturn: 1974 if(visit == PostVisit) 1975 { 1976 TIntermTyped *value = node->getExpression(); 1977 1978 if(value) 1979 { 1980 copy(functionArray[currentFunction].ret, value); 1981 } 1982 1983 emit(sw::Shader::OPCODE_LEAVE); 1984 } 1985 break; 1986 default: UNREACHABLE(node->getFlowOp()); 1987 } 1988 1989 return true; 1990 } 1991 visitSwitch(Visit visit,TIntermSwitch * node)1992 bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node) 1993 { 1994 if(currentScope != emitScope) 1995 { 1996 return false; 1997 } 1998 1999 TIntermTyped* switchValue = node->getInit(); 2000 TIntermAggregate* opList = node->getStatementList(); 2001 2002 if(!switchValue || !opList) 2003 { 2004 return false; 2005 } 2006 2007 switchValue->traverse(this); 2008 2009 emit(sw::Shader::OPCODE_SWITCH); 2010 2011 TIntermSequence& sequence = opList->getSequence(); 2012 TIntermSequence::iterator it = sequence.begin(); 2013 TIntermSequence::iterator defaultIt = sequence.end(); 2014 int nbCases = 0; 2015 for(; it != sequence.end(); ++it) 2016 { 2017 TIntermCase* currentCase = (*it)->getAsCaseNode(); 2018 if(currentCase) 2019 { 2020 TIntermSequence::iterator caseIt = it; 2021 2022 TIntermTyped* condition = currentCase->getCondition(); 2023 if(condition) // non default case 2024 { 2025 if(nbCases != 0) 2026 { 2027 emit(sw::Shader::OPCODE_ELSE); 2028 } 2029 2030 condition->traverse(this); 2031 Temporary result(this); 2032 emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition); 2033 emit(sw::Shader::OPCODE_IF, 0, &result); 2034 nbCases++; 2035 2036 // Emit the code for this case and all subsequent cases until we hit a break statement. 2037 // TODO: This can repeat a lot of code for switches with many fall-through cases. 2038 for(++caseIt; caseIt != sequence.end(); ++caseIt) 2039 { 2040 (*caseIt)->traverse(this); 2041 2042 // Stop if we encounter an unconditional branch (break, continue, return, or kill). 2043 // TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}). 2044 // Note that this eliminates useless operations but shouldn't affect correctness. 2045 if((*caseIt)->getAsBranchNode()) 2046 { 2047 break; 2048 } 2049 } 2050 } 2051 else 2052 { 2053 defaultIt = it; // The default case might not be the last case, keep it for last 2054 } 2055 } 2056 } 2057 2058 // If there's a default case, traverse it here 2059 if(defaultIt != sequence.end()) 2060 { 2061 emit(sw::Shader::OPCODE_ELSE); 2062 for(++defaultIt; defaultIt != sequence.end(); ++defaultIt) 2063 { 2064 (*defaultIt)->traverse(this); 2065 if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return 2066 { 2067 break; 2068 } 2069 } 2070 } 2071 2072 for(int i = 0; i < nbCases; ++i) 2073 { 2074 emit(sw::Shader::OPCODE_ENDIF); 2075 } 2076 2077 emit(sw::Shader::OPCODE_ENDSWITCH); 2078 2079 return false; 2080 } 2081 emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2082 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4) 2083 { 2084 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0); 2085 } 2086 emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2087 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1, 2088 TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4) 2089 { 2090 Instruction *instruction = new Instruction(op); 2091 2092 if(dst) 2093 { 2094 destination(instruction->dst, dst, dstIndex); 2095 } 2096 2097 if(src0) 2098 { 2099 TIntermTyped* src = src0->getAsTyped(); 2100 instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow); 2101 } 2102 2103 source(instruction->src[0], src0, index0); 2104 source(instruction->src[1], src1, index1); 2105 source(instruction->src[2], src2, index2); 2106 source(instruction->src[3], src3, index3); 2107 source(instruction->src[4], src4, index4); 2108 2109 shader->append(instruction); 2110 2111 return instruction; 2112 } 2113 emitCast(TIntermTyped * dst,TIntermTyped * src)2114 Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src) 2115 { 2116 return emitCast(dst, 0, src, 0); 2117 } 2118 emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2119 Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex) 2120 { 2121 switch(src->getBasicType()) 2122 { 2123 case EbtBool: 2124 switch(dst->getBasicType()) 2125 { 2126 case EbtInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2127 case EbtUInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2128 case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex); 2129 default: break; 2130 } 2131 break; 2132 case EbtInt: 2133 switch(dst->getBasicType()) 2134 { 2135 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2136 case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex); 2137 default: break; 2138 } 2139 break; 2140 case EbtUInt: 2141 switch(dst->getBasicType()) 2142 { 2143 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2144 case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex); 2145 default: break; 2146 } 2147 break; 2148 case EbtFloat: 2149 switch(dst->getBasicType()) 2150 { 2151 case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex); 2152 case EbtInt: return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex); 2153 case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex); 2154 default: break; 2155 } 2156 break; 2157 default: 2158 break; 2159 } 2160 2161 ASSERT((src->getBasicType() == dst->getBasicType()) || 2162 ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) || 2163 ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt))); 2164 2165 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex); 2166 } 2167 emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2168 void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2) 2169 { 2170 for(int index = 0; index < dst->elementRegisterCount(); index++) 2171 { 2172 emit(op, dst, index, src0, index, src1, index, src2, index); 2173 } 2174 } 2175 emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2176 void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1) 2177 { 2178 emitBinary(op, result, src0, src1); 2179 assignLvalue(lhs, result); 2180 } 2181 emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2182 void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index) 2183 { 2184 sw::Shader::Opcode opcode; 2185 switch(left->getAsTyped()->getBasicType()) 2186 { 2187 case EbtBool: 2188 case EbtInt: 2189 opcode = sw::Shader::OPCODE_ICMP; 2190 break; 2191 case EbtUInt: 2192 opcode = sw::Shader::OPCODE_UCMP; 2193 break; 2194 default: 2195 opcode = sw::Shader::OPCODE_CMP; 2196 break; 2197 } 2198 2199 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index); 2200 cmp->control = cmpOp; 2201 } 2202 componentCount(const TType & type,int registers)2203 int componentCount(const TType &type, int registers) 2204 { 2205 if(registers == 0) 2206 { 2207 return 0; 2208 } 2209 2210 if(type.isArray() && registers >= type.elementRegisterCount()) 2211 { 2212 int index = registers / type.elementRegisterCount(); 2213 registers -= index * type.elementRegisterCount(); 2214 return index * type.getElementSize() + componentCount(type, registers); 2215 } 2216 2217 if(type.isStruct() || type.isInterfaceBlock()) 2218 { 2219 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2220 int elements = 0; 2221 2222 for(const auto &field : fields) 2223 { 2224 const TType &fieldType = *(field->type()); 2225 2226 if(fieldType.totalRegisterCount() <= registers) 2227 { 2228 registers -= fieldType.totalRegisterCount(); 2229 elements += fieldType.getObjectSize(); 2230 } 2231 else // Register within this field 2232 { 2233 return elements + componentCount(fieldType, registers); 2234 } 2235 } 2236 } 2237 else if(type.isMatrix()) 2238 { 2239 return registers * type.registerSize(); 2240 } 2241 2242 UNREACHABLE(0); 2243 return 0; 2244 } 2245 registerSize(const TType & type,int registers)2246 int registerSize(const TType &type, int registers) 2247 { 2248 if(registers == 0) 2249 { 2250 if(type.isStruct()) 2251 { 2252 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0); 2253 } 2254 else if(type.isInterfaceBlock()) 2255 { 2256 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0); 2257 } 2258 2259 return type.registerSize(); 2260 } 2261 2262 if(type.isArray() && registers >= type.elementRegisterCount()) 2263 { 2264 int index = registers / type.elementRegisterCount(); 2265 registers -= index * type.elementRegisterCount(); 2266 return registerSize(type, registers); 2267 } 2268 2269 if(type.isStruct() || type.isInterfaceBlock()) 2270 { 2271 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2272 int elements = 0; 2273 2274 for(const auto &field : fields) 2275 { 2276 const TType &fieldType = *(field->type()); 2277 2278 if(fieldType.totalRegisterCount() <= registers) 2279 { 2280 registers -= fieldType.totalRegisterCount(); 2281 elements += fieldType.getObjectSize(); 2282 } 2283 else // Register within this field 2284 { 2285 return registerSize(fieldType, registers); 2286 } 2287 } 2288 } 2289 else if(type.isMatrix()) 2290 { 2291 return registerSize(type, 0); 2292 } 2293 2294 UNREACHABLE(0); 2295 return 0; 2296 } 2297 getBlockId(TIntermTyped * arg)2298 int OutputASM::getBlockId(TIntermTyped *arg) 2299 { 2300 if(arg) 2301 { 2302 const TType &type = arg->getType(); 2303 TInterfaceBlock* block = type.getInterfaceBlock(); 2304 if(block && (type.getQualifier() == EvqUniform)) 2305 { 2306 // Make sure the uniform block is declared 2307 uniformRegister(arg); 2308 2309 const char* blockName = block->name().c_str(); 2310 2311 // Fetch uniform block index from array of blocks 2312 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it) 2313 { 2314 if(blockName == it->name) 2315 { 2316 return it->blockId; 2317 } 2318 } 2319 2320 ASSERT(false); 2321 } 2322 } 2323 2324 return -1; 2325 } 2326 getArgumentInfo(TIntermTyped * arg,int index)2327 OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index) 2328 { 2329 const TType &type = arg->getType(); 2330 int blockId = getBlockId(arg); 2331 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1); 2332 if(blockId != -1) 2333 { 2334 argumentInfo.bufferIndex = 0; 2335 for(int i = 0; i < blockId; ++i) 2336 { 2337 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize; 2338 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1; 2339 } 2340 2341 const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId]; 2342 2343 BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end(); 2344 BlockDefinitionIndexMap::const_iterator it = itEnd; 2345 2346 argumentInfo.clampedIndex = index; 2347 if(type.isInterfaceBlock()) 2348 { 2349 // Offset index to the beginning of the selected instance 2350 int blockRegisters = type.elementRegisterCount(); 2351 int bufferOffset = argumentInfo.clampedIndex / blockRegisters; 2352 argumentInfo.bufferIndex += bufferOffset; 2353 argumentInfo.clampedIndex -= bufferOffset * blockRegisters; 2354 } 2355 2356 int regIndex = registerIndex(arg); 2357 for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i) 2358 { 2359 it = blockDefinition.find(i); 2360 if(it != itEnd) 2361 { 2362 argumentInfo.clampedIndex -= (i - regIndex); 2363 break; 2364 } 2365 } 2366 ASSERT(it != itEnd); 2367 2368 argumentInfo.typedMemberInfo = it->second; 2369 2370 int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount(); 2371 argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex; 2372 } 2373 else 2374 { 2375 argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index; 2376 } 2377 2378 return argumentInfo; 2379 } 2380 source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2381 void OutputASM::source(sw::Shader::SourceParameter ¶meter, TIntermNode *argument, int index) 2382 { 2383 if(argument) 2384 { 2385 TIntermTyped *arg = argument->getAsTyped(); 2386 Temporary unpackedUniform(this); 2387 2388 const TType& srcType = arg->getType(); 2389 TInterfaceBlock* srcBlock = srcType.getInterfaceBlock(); 2390 if(srcBlock && (srcType.getQualifier() == EvqUniform)) 2391 { 2392 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2393 const TType &memberType = argumentInfo.typedMemberInfo.type; 2394 2395 if(memberType.getBasicType() == EbtBool) 2396 { 2397 ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize 2398 2399 // Convert the packed bool, which is currently an int, to a true bool 2400 Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B); 2401 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2402 instruction->dst.index = registerIndex(&unpackedUniform); 2403 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2404 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2405 instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride; 2406 2407 shader->append(instruction); 2408 2409 arg = &unpackedUniform; 2410 index = 0; 2411 } 2412 else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix()) 2413 { 2414 int numCols = memberType.getNominalSize(); 2415 int numRows = memberType.getSecondarySize(); 2416 2417 ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize 2418 2419 unsigned int dstIndex = registerIndex(&unpackedUniform); 2420 unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55; 2421 int arrayIndex = argumentInfo.clampedIndex / numCols; 2422 int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride; 2423 2424 for(int j = 0; j < numRows; ++j) 2425 { 2426 // Transpose the row major matrix 2427 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV); 2428 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2429 instruction->dst.index = dstIndex; 2430 instruction->dst.mask = 1 << j; 2431 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2432 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2433 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride; 2434 instruction->src[0].swizzle = srcSwizzle; 2435 2436 shader->append(instruction); 2437 } 2438 2439 arg = &unpackedUniform; 2440 index = 0; 2441 } 2442 } 2443 2444 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2445 const TType &type = argumentInfo.typedMemberInfo.type; 2446 2447 int size = registerSize(type, argumentInfo.clampedIndex); 2448 2449 parameter.type = registerType(arg); 2450 parameter.bufferIndex = argumentInfo.bufferIndex; 2451 2452 if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer()) 2453 { 2454 int component = componentCount(type, argumentInfo.clampedIndex); 2455 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer(); 2456 2457 for(int i = 0; i < 4; i++) 2458 { 2459 if(size == 1) // Replicate 2460 { 2461 parameter.value[i] = constants[component + 0].getAsFloat(); 2462 } 2463 else if(i < size) 2464 { 2465 parameter.value[i] = constants[component + i].getAsFloat(); 2466 } 2467 else 2468 { 2469 parameter.value[i] = 0.0f; 2470 } 2471 } 2472 } 2473 else 2474 { 2475 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex; 2476 2477 if(parameter.bufferIndex != -1) 2478 { 2479 int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride; 2480 parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride; 2481 } 2482 } 2483 2484 if(!IsSampler(arg->getBasicType())) 2485 { 2486 parameter.swizzle = readSwizzle(arg, size); 2487 } 2488 } 2489 } 2490 destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2491 void OutputASM::destination(sw::Shader::DestinationParameter ¶meter, TIntermTyped *arg, int index) 2492 { 2493 parameter.type = registerType(arg); 2494 parameter.index = registerIndex(arg) + index; 2495 parameter.mask = writeMask(arg, index); 2496 } 2497 copy(TIntermTyped * dst,TIntermNode * src,int offset)2498 void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset) 2499 { 2500 for(int index = 0; index < dst->totalRegisterCount(); index++) 2501 { 2502 Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index); 2503 } 2504 } 2505 swizzleElement(int swizzle,int index)2506 int swizzleElement(int swizzle, int index) 2507 { 2508 return (swizzle >> (index * 2)) & 0x03; 2509 } 2510 swizzleSwizzle(int leftSwizzle,int rightSwizzle)2511 int swizzleSwizzle(int leftSwizzle, int rightSwizzle) 2512 { 2513 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) | 2514 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) | 2515 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) | 2516 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6); 2517 } 2518 assignLvalue(TIntermTyped * dst,TIntermTyped * src)2519 void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src) 2520 { 2521 if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) || 2522 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))) 2523 { 2524 return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix"); 2525 } 2526 2527 TIntermBinary *binary = dst->getAsBinaryNode(); 2528 2529 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar()) 2530 { 2531 Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT); 2532 2533 lvalue(insert->dst, dst); 2534 2535 insert->src[0].type = insert->dst.type; 2536 insert->src[0].index = insert->dst.index; 2537 insert->src[0].rel = insert->dst.rel; 2538 source(insert->src[1], src); 2539 source(insert->src[2], binary->getRight()); 2540 2541 shader->append(insert); 2542 } 2543 else 2544 { 2545 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2546 2547 int swizzle = lvalue(mov1->dst, dst); 2548 2549 source(mov1->src[0], src); 2550 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2551 2552 shader->append(mov1); 2553 2554 for(int offset = 1; offset < dst->totalRegisterCount(); offset++) 2555 { 2556 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV); 2557 2558 mov->dst = mov1->dst; 2559 mov->dst.index += offset; 2560 mov->dst.mask = writeMask(dst, offset); 2561 2562 source(mov->src[0], src, offset); 2563 2564 shader->append(mov); 2565 } 2566 } 2567 } 2568 evaluateRvalue(TIntermTyped * node)2569 void OutputASM::evaluateRvalue(TIntermTyped *node) 2570 { 2571 TIntermBinary *binary = node->getAsBinaryNode(); 2572 2573 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar()) 2574 { 2575 Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT); 2576 2577 destination(insert->dst, node); 2578 2579 Temporary address(this); 2580 unsigned char mask; 2581 TIntermTyped *root = nullptr; 2582 unsigned int offset = 0; 2583 int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node); 2584 2585 source(insert->src[0], root, offset); 2586 insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle); 2587 2588 source(insert->src[1], binary->getRight()); 2589 2590 shader->append(insert); 2591 } 2592 else 2593 { 2594 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2595 2596 destination(mov1->dst, node, 0); 2597 2598 Temporary address(this); 2599 unsigned char mask; 2600 TIntermTyped *root = nullptr; 2601 unsigned int offset = 0; 2602 int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node); 2603 2604 source(mov1->src[0], root, offset); 2605 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2606 2607 shader->append(mov1); 2608 2609 for(int i = 1; i < node->totalRegisterCount(); i++) 2610 { 2611 Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i); 2612 mov->src[0].rel = mov1->src[0].rel; 2613 } 2614 } 2615 } 2616 lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2617 int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node) 2618 { 2619 Temporary address(this); 2620 TIntermTyped *root = nullptr; 2621 unsigned int offset = 0; 2622 unsigned char mask = 0xF; 2623 int swizzle = lvalue(root, offset, dst.rel, mask, address, node); 2624 2625 dst.type = registerType(root); 2626 dst.index = registerIndex(root) + offset; 2627 dst.mask = mask; 2628 2629 return swizzle; 2630 } 2631 lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2632 int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node) 2633 { 2634 TIntermTyped *result = node; 2635 TIntermBinary *binary = node->getAsBinaryNode(); 2636 TIntermSymbol *symbol = node->getAsSymbolNode(); 2637 2638 if(binary) 2639 { 2640 TIntermTyped *left = binary->getLeft(); 2641 TIntermTyped *right = binary->getRight(); 2642 2643 int leftSwizzle = lvalue(root, offset, rel, mask, address, left); // Resolve the l-value of the left side 2644 2645 switch(binary->getOp()) 2646 { 2647 case EOpIndexDirect: 2648 { 2649 int rightIndex = right->getAsConstantUnion()->getIConst(0); 2650 2651 if(left->isRegister()) 2652 { 2653 int leftMask = mask; 2654 2655 mask = 1; 2656 while((leftMask & mask) == 0) 2657 { 2658 mask = mask << 1; 2659 } 2660 2661 int element = swizzleElement(leftSwizzle, rightIndex); 2662 mask = 1 << element; 2663 2664 return element; 2665 } 2666 else if(left->isArray() || left->isMatrix()) 2667 { 2668 offset += rightIndex * result->totalRegisterCount(); 2669 return 0xE4; 2670 } 2671 else UNREACHABLE(0); 2672 } 2673 break; 2674 case EOpIndexIndirect: 2675 { 2676 right->traverse(this); 2677 2678 if(left->isRegister()) 2679 { 2680 // Requires INSERT instruction (handled by calling function) 2681 } 2682 else if(left->isArray() || left->isMatrix()) 2683 { 2684 int scale = result->totalRegisterCount(); 2685 2686 if(rel.type == sw::Shader::PARAMETER_VOID) // Use the index register as the relative address directly 2687 { 2688 if(left->totalRegisterCount() > 1) 2689 { 2690 sw::Shader::SourceParameter relativeRegister; 2691 source(relativeRegister, right); 2692 2693 int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0; 2694 2695 rel.index = relativeRegister.index; 2696 rel.type = relativeRegister.type; 2697 rel.scale = scale; 2698 rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0); 2699 } 2700 } 2701 else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register 2702 { 2703 if(scale == 1) 2704 { 2705 Constant oldScale((int)rel.scale); 2706 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right); 2707 mad->src[0].index = rel.index; 2708 mad->src[0].type = rel.type; 2709 } 2710 else 2711 { 2712 Constant oldScale((int)rel.scale); 2713 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale); 2714 mul->src[0].index = rel.index; 2715 mul->src[0].type = rel.type; 2716 2717 Constant newScale(scale); 2718 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2719 } 2720 2721 rel.type = sw::Shader::PARAMETER_TEMP; 2722 rel.index = registerIndex(&address); 2723 rel.scale = 1; 2724 } 2725 else // Just add the new index to the address register 2726 { 2727 if(scale == 1) 2728 { 2729 emit(sw::Shader::OPCODE_IADD, &address, &address, right); 2730 } 2731 else 2732 { 2733 Constant newScale(scale); 2734 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2735 } 2736 } 2737 } 2738 else UNREACHABLE(0); 2739 } 2740 break; 2741 case EOpIndexDirectStruct: 2742 case EOpIndexDirectInterfaceBlock: 2743 { 2744 const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ? 2745 left->getType().getStruct()->fields() : 2746 left->getType().getInterfaceBlock()->fields(); 2747 int index = right->getAsConstantUnion()->getIConst(0); 2748 int fieldOffset = 0; 2749 2750 for(int i = 0; i < index; i++) 2751 { 2752 fieldOffset += fields[i]->type()->totalRegisterCount(); 2753 } 2754 2755 offset += fieldOffset; 2756 mask = writeMask(result); 2757 2758 return 0xE4; 2759 } 2760 break; 2761 case EOpVectorSwizzle: 2762 { 2763 ASSERT(left->isRegister()); 2764 2765 int leftMask = mask; 2766 2767 int swizzle = 0; 2768 int rightMask = 0; 2769 2770 TIntermSequence &sequence = right->getAsAggregate()->getSequence(); 2771 2772 for(unsigned int i = 0; i < sequence.size(); i++) 2773 { 2774 int index = sequence[i]->getAsConstantUnion()->getIConst(0); 2775 2776 int element = swizzleElement(leftSwizzle, index); 2777 rightMask = rightMask | (1 << element); 2778 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2); 2779 } 2780 2781 mask = leftMask & rightMask; 2782 2783 return swizzle; 2784 } 2785 break; 2786 default: 2787 UNREACHABLE(binary->getOp()); // Not an l-value operator 2788 break; 2789 } 2790 } 2791 else if(symbol) 2792 { 2793 root = symbol; 2794 offset = 0; 2795 mask = writeMask(symbol); 2796 2797 return 0xE4; 2798 } 2799 else 2800 { 2801 node->traverse(this); 2802 2803 root = node; 2804 offset = 0; 2805 mask = writeMask(node); 2806 2807 return 0xE4; 2808 } 2809 2810 return 0xE4; 2811 } 2812 registerType(TIntermTyped * operand)2813 sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand) 2814 { 2815 if(isSamplerRegister(operand)) 2816 { 2817 return sw::Shader::PARAMETER_SAMPLER; 2818 } 2819 2820 const TQualifier qualifier = operand->getQualifier(); 2821 if((qualifier == EvqFragColor) || (qualifier == EvqFragData)) 2822 { 2823 if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) || 2824 ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData))) 2825 { 2826 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", ""); 2827 } 2828 outputQualifier = qualifier; 2829 } 2830 2831 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2832 { 2833 // Constant arrays are in the constant register file. 2834 if(operand->isArray() && operand->getArraySize() > 1) 2835 { 2836 return sw::Shader::PARAMETER_CONST; 2837 } 2838 else 2839 { 2840 return sw::Shader::PARAMETER_TEMP; 2841 } 2842 } 2843 2844 switch(qualifier) 2845 { 2846 case EvqTemporary: return sw::Shader::PARAMETER_TEMP; 2847 case EvqGlobal: return sw::Shader::PARAMETER_TEMP; 2848 case EvqConstExpr: return sw::Shader::PARAMETER_FLOAT4LITERAL; // All converted to float 2849 case EvqAttribute: return sw::Shader::PARAMETER_INPUT; 2850 case EvqVaryingIn: return sw::Shader::PARAMETER_INPUT; 2851 case EvqVaryingOut: return sw::Shader::PARAMETER_OUTPUT; 2852 case EvqVertexIn: return sw::Shader::PARAMETER_INPUT; 2853 case EvqFragmentOut: return sw::Shader::PARAMETER_COLOROUT; 2854 case EvqVertexOut: return sw::Shader::PARAMETER_OUTPUT; 2855 case EvqFragmentIn: return sw::Shader::PARAMETER_INPUT; 2856 case EvqInvariantVaryingIn: return sw::Shader::PARAMETER_INPUT; // FIXME: Guarantee invariance at the backend 2857 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT; // FIXME: Guarantee invariance at the backend 2858 case EvqSmooth: return sw::Shader::PARAMETER_OUTPUT; 2859 case EvqFlat: return sw::Shader::PARAMETER_OUTPUT; 2860 case EvqCentroidOut: return sw::Shader::PARAMETER_OUTPUT; 2861 case EvqSmoothIn: return sw::Shader::PARAMETER_INPUT; 2862 case EvqFlatIn: return sw::Shader::PARAMETER_INPUT; 2863 case EvqCentroidIn: return sw::Shader::PARAMETER_INPUT; 2864 case EvqUniform: return sw::Shader::PARAMETER_CONST; 2865 case EvqIn: return sw::Shader::PARAMETER_TEMP; 2866 case EvqOut: return sw::Shader::PARAMETER_TEMP; 2867 case EvqInOut: return sw::Shader::PARAMETER_TEMP; 2868 case EvqConstReadOnly: return sw::Shader::PARAMETER_TEMP; 2869 case EvqPosition: return sw::Shader::PARAMETER_OUTPUT; 2870 case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT; 2871 case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE; 2872 case EvqVertexID: return sw::Shader::PARAMETER_MISCTYPE; 2873 case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE; 2874 case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE; 2875 case EvqPointCoord: return sw::Shader::PARAMETER_INPUT; 2876 case EvqFragColor: return sw::Shader::PARAMETER_COLOROUT; 2877 case EvqFragData: return sw::Shader::PARAMETER_COLOROUT; 2878 case EvqFragDepth: return sw::Shader::PARAMETER_DEPTHOUT; 2879 default: UNREACHABLE(qualifier); 2880 } 2881 2882 return sw::Shader::PARAMETER_VOID; 2883 } 2884 hasFlatQualifier(TIntermTyped * operand)2885 bool OutputASM::hasFlatQualifier(TIntermTyped *operand) 2886 { 2887 const TQualifier qualifier = operand->getQualifier(); 2888 return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn; 2889 } 2890 registerIndex(TIntermTyped * operand)2891 unsigned int OutputASM::registerIndex(TIntermTyped *operand) 2892 { 2893 if(isSamplerRegister(operand)) 2894 { 2895 return samplerRegister(operand); 2896 } 2897 else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler 2898 { 2899 samplerRegister(operand); // Make sure the sampler is declared 2900 } 2901 2902 switch(operand->getQualifier()) 2903 { 2904 case EvqTemporary: return temporaryRegister(operand); 2905 case EvqGlobal: return temporaryRegister(operand); 2906 case EvqConstExpr: return temporaryRegister(operand); // Unevaluated constant expression 2907 case EvqAttribute: return attributeRegister(operand); 2908 case EvqVaryingIn: return varyingRegister(operand); 2909 case EvqVaryingOut: return varyingRegister(operand); 2910 case EvqVertexIn: return attributeRegister(operand); 2911 case EvqFragmentOut: return fragmentOutputRegister(operand); 2912 case EvqVertexOut: return varyingRegister(operand); 2913 case EvqFragmentIn: return varyingRegister(operand); 2914 case EvqInvariantVaryingIn: return varyingRegister(operand); 2915 case EvqInvariantVaryingOut: return varyingRegister(operand); 2916 case EvqSmooth: return varyingRegister(operand); 2917 case EvqFlat: return varyingRegister(operand); 2918 case EvqCentroidOut: return varyingRegister(operand); 2919 case EvqSmoothIn: return varyingRegister(operand); 2920 case EvqFlatIn: return varyingRegister(operand); 2921 case EvqCentroidIn: return varyingRegister(operand); 2922 case EvqUniform: return uniformRegister(operand); 2923 case EvqIn: return temporaryRegister(operand); 2924 case EvqOut: return temporaryRegister(operand); 2925 case EvqInOut: return temporaryRegister(operand); 2926 case EvqConstReadOnly: return temporaryRegister(operand); 2927 case EvqPosition: return varyingRegister(operand); 2928 case EvqPointSize: return varyingRegister(operand); 2929 case EvqInstanceID: vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex; 2930 case EvqVertexID: vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex; 2931 case EvqFragCoord: pixelShader->declareVPos(); return sw::Shader::VPosIndex; 2932 case EvqFrontFacing: pixelShader->declareVFace(); return sw::Shader::VFaceIndex; 2933 case EvqPointCoord: return varyingRegister(operand); 2934 case EvqFragColor: return 0; 2935 case EvqFragData: return fragmentOutputRegister(operand); 2936 case EvqFragDepth: return 0; 2937 default: UNREACHABLE(operand->getQualifier()); 2938 } 2939 2940 return 0; 2941 } 2942 writeMask(TIntermTyped * destination,int index)2943 int OutputASM::writeMask(TIntermTyped *destination, int index) 2944 { 2945 if(destination->getQualifier() == EvqPointSize) 2946 { 2947 return 0x2; // Point size stored in the y component 2948 } 2949 2950 return 0xF >> (4 - registerSize(destination->getType(), index)); 2951 } 2952 readSwizzle(TIntermTyped * argument,int size)2953 int OutputASM::readSwizzle(TIntermTyped *argument, int size) 2954 { 2955 if(argument->getQualifier() == EvqPointSize) 2956 { 2957 return 0x55; // Point size stored in the y component 2958 } 2959 2960 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4}; // (void), xxxx, xyyy, xyzz, xyzw 2961 2962 return swizzleSize[size]; 2963 } 2964 2965 // Conservatively checks whether an expression is fast to compute and has no side effects trivial(TIntermTyped * expression,int budget)2966 bool OutputASM::trivial(TIntermTyped *expression, int budget) 2967 { 2968 if(!expression->isRegister()) 2969 { 2970 return false; 2971 } 2972 2973 return cost(expression, budget) >= 0; 2974 } 2975 2976 // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects) cost(TIntermNode * expression,int budget)2977 int OutputASM::cost(TIntermNode *expression, int budget) 2978 { 2979 if(budget < 0) 2980 { 2981 return budget; 2982 } 2983 2984 if(expression->getAsSymbolNode()) 2985 { 2986 return budget; 2987 } 2988 else if(expression->getAsConstantUnion()) 2989 { 2990 return budget; 2991 } 2992 else if(expression->getAsBinaryNode()) 2993 { 2994 TIntermBinary *binary = expression->getAsBinaryNode(); 2995 2996 switch(binary->getOp()) 2997 { 2998 case EOpVectorSwizzle: 2999 case EOpIndexDirect: 3000 case EOpIndexDirectStruct: 3001 case EOpIndexDirectInterfaceBlock: 3002 return cost(binary->getLeft(), budget - 0); 3003 case EOpAdd: 3004 case EOpSub: 3005 case EOpMul: 3006 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1)); 3007 default: 3008 return -1; 3009 } 3010 } 3011 else if(expression->getAsUnaryNode()) 3012 { 3013 TIntermUnary *unary = expression->getAsUnaryNode(); 3014 3015 switch(unary->getOp()) 3016 { 3017 case EOpAbs: 3018 case EOpNegative: 3019 return cost(unary->getOperand(), budget - 1); 3020 default: 3021 return -1; 3022 } 3023 } 3024 else if(expression->getAsSelectionNode()) 3025 { 3026 TIntermSelection *selection = expression->getAsSelectionNode(); 3027 3028 if(selection->usesTernaryOperator()) 3029 { 3030 TIntermTyped *condition = selection->getCondition(); 3031 TIntermNode *trueBlock = selection->getTrueBlock(); 3032 TIntermNode *falseBlock = selection->getFalseBlock(); 3033 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 3034 3035 if(constantCondition) 3036 { 3037 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 3038 3039 if(trueCondition) 3040 { 3041 return cost(trueBlock, budget - 0); 3042 } 3043 else 3044 { 3045 return cost(falseBlock, budget - 0); 3046 } 3047 } 3048 else 3049 { 3050 return cost(trueBlock, cost(falseBlock, budget - 2)); 3051 } 3052 } 3053 } 3054 3055 return -1; 3056 } 3057 findFunction(const TString & name)3058 const Function *OutputASM::findFunction(const TString &name) 3059 { 3060 for(unsigned int f = 0; f < functionArray.size(); f++) 3061 { 3062 if(functionArray[f].name == name) 3063 { 3064 return &functionArray[f]; 3065 } 3066 } 3067 3068 return 0; 3069 } 3070 temporaryRegister(TIntermTyped * temporary)3071 int OutputASM::temporaryRegister(TIntermTyped *temporary) 3072 { 3073 int index = allocate(temporaries, temporary); 3074 if(index >= sw::NUM_TEMPORARY_REGISTERS) 3075 { 3076 mContext.error(temporary->getLine(), 3077 "Too many temporary registers required to compile shader", 3078 pixelShader ? "pixel shader" : "vertex shader"); 3079 } 3080 return index; 3081 } 3082 setPixelShaderInputs(const TType & type,int var,bool flat)3083 void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat) 3084 { 3085 if(type.isStruct()) 3086 { 3087 const TFieldList &fields = type.getStruct()->fields(); 3088 int fieldVar = var; 3089 for(const auto &field : fields) 3090 { 3091 const TType& fieldType = *(field->type()); 3092 setPixelShaderInputs(fieldType, fieldVar, flat); 3093 fieldVar += fieldType.totalRegisterCount(); 3094 } 3095 } 3096 else 3097 { 3098 for(int i = 0; i < type.totalRegisterCount(); i++) 3099 { 3100 pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat)); 3101 } 3102 } 3103 } 3104 varyingRegister(TIntermTyped * varying)3105 int OutputASM::varyingRegister(TIntermTyped *varying) 3106 { 3107 int var = lookup(varyings, varying); 3108 3109 if(var == -1) 3110 { 3111 var = allocate(varyings, varying); 3112 int registerCount = varying->totalRegisterCount(); 3113 3114 if(pixelShader) 3115 { 3116 if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS) 3117 { 3118 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader"); 3119 return 0; 3120 } 3121 3122 if(varying->getQualifier() == EvqPointCoord) 3123 { 3124 ASSERT(varying->isRegister()); 3125 pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var)); 3126 } 3127 else 3128 { 3129 setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying)); 3130 } 3131 } 3132 else if(vertexShader) 3133 { 3134 if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS) 3135 { 3136 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader"); 3137 return 0; 3138 } 3139 3140 if(varying->getQualifier() == EvqPosition) 3141 { 3142 ASSERT(varying->isRegister()); 3143 vertexShader->setPositionRegister(var); 3144 } 3145 else if(varying->getQualifier() == EvqPointSize) 3146 { 3147 ASSERT(varying->isRegister()); 3148 vertexShader->setPointSizeRegister(var); 3149 } 3150 else 3151 { 3152 // Semantic indexes for user varyings will be assigned during program link to match the pixel shader 3153 } 3154 } 3155 else UNREACHABLE(0); 3156 3157 declareVarying(varying, var); 3158 } 3159 3160 return var; 3161 } 3162 declareVarying(TIntermTyped * varying,int reg)3163 void OutputASM::declareVarying(TIntermTyped *varying, int reg) 3164 { 3165 if(varying->getQualifier() != EvqPointCoord) // gl_PointCoord does not need linking 3166 { 3167 TIntermSymbol *symbol = varying->getAsSymbolNode(); 3168 declareVarying(varying->getType(), symbol->getSymbol(), reg); 3169 } 3170 } 3171 declareVarying(const TType & type,const TString & varyingName,int registerIndex)3172 void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex) 3173 { 3174 const char *name = varyingName.c_str(); 3175 VaryingList &activeVaryings = shaderObject->varyings; 3176 3177 TStructure* structure = type.getStruct(); 3178 if(structure) 3179 { 3180 int fieldRegisterIndex = registerIndex; 3181 3182 const TFieldList &fields = type.getStruct()->fields(); 3183 for(const auto &field : fields) 3184 { 3185 const TType& fieldType = *(field->type()); 3186 declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex); 3187 if(fieldRegisterIndex >= 0) 3188 { 3189 fieldRegisterIndex += fieldType.totalRegisterCount(); 3190 } 3191 } 3192 } 3193 else 3194 { 3195 // Check if this varying has been declared before without having a register assigned 3196 for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++) 3197 { 3198 if(v->name == name) 3199 { 3200 if(registerIndex >= 0) 3201 { 3202 ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex); 3203 v->registerIndex = registerIndex; 3204 } 3205 3206 return; 3207 } 3208 } 3209 3210 activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0)); 3211 } 3212 } 3213 declareFragmentOutput(TIntermTyped * fragmentOutput)3214 void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput) 3215 { 3216 int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location; 3217 int registerCount = fragmentOutput->totalRegisterCount(); 3218 if(requestedLocation < 0) 3219 { 3220 ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier 3221 return; // No requested location 3222 } 3223 else if((requestedLocation + registerCount) > sw::RENDERTARGETS) 3224 { 3225 mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader"); 3226 } 3227 else 3228 { 3229 int currentIndex = lookup(fragmentOutputs, fragmentOutput); 3230 if(requestedLocation != currentIndex) 3231 { 3232 if(currentIndex != -1) 3233 { 3234 mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader"); 3235 } 3236 else 3237 { 3238 if(fragmentOutputs.size() <= (size_t)requestedLocation) 3239 { 3240 while(fragmentOutputs.size() < (size_t)requestedLocation) 3241 { 3242 fragmentOutputs.push_back(nullptr); 3243 } 3244 for(int i = 0; i < registerCount; i++) 3245 { 3246 fragmentOutputs.push_back(fragmentOutput); 3247 } 3248 } 3249 else 3250 { 3251 for(int i = 0; i < registerCount; i++) 3252 { 3253 if(!fragmentOutputs[requestedLocation + i]) 3254 { 3255 fragmentOutputs[requestedLocation + i] = fragmentOutput; 3256 } 3257 else 3258 { 3259 mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader"); 3260 return; 3261 } 3262 } 3263 } 3264 } 3265 } 3266 } 3267 } 3268 uniformRegister(TIntermTyped * uniform)3269 int OutputASM::uniformRegister(TIntermTyped *uniform) 3270 { 3271 const TType &type = uniform->getType(); 3272 ASSERT(!IsSampler(type.getBasicType())); 3273 TInterfaceBlock *block = type.getAsInterfaceBlock(); 3274 TIntermSymbol *symbol = uniform->getAsSymbolNode(); 3275 ASSERT(symbol || block); 3276 3277 if(symbol || block) 3278 { 3279 TInterfaceBlock* parentBlock = type.getInterfaceBlock(); 3280 bool isBlockMember = (!block && parentBlock); 3281 int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform); 3282 3283 if(index == -1 || isBlockMember) 3284 { 3285 if(index == -1) 3286 { 3287 index = allocate(uniforms, uniform); 3288 } 3289 3290 // Verify if the current uniform is a member of an already declared block 3291 const TString &name = symbol ? symbol->getSymbol() : block->name(); 3292 int blockMemberIndex = blockMemberLookup(type, name, index); 3293 if(blockMemberIndex == -1) 3294 { 3295 declareUniform(type, name, index, false); 3296 } 3297 else 3298 { 3299 index = blockMemberIndex; 3300 } 3301 } 3302 3303 return index; 3304 } 3305 3306 return 0; 3307 } 3308 attributeRegister(TIntermTyped * attribute)3309 int OutputASM::attributeRegister(TIntermTyped *attribute) 3310 { 3311 ASSERT(!attribute->isArray()); 3312 3313 int index = lookup(attributes, attribute); 3314 3315 if(index == -1) 3316 { 3317 TIntermSymbol *symbol = attribute->getAsSymbolNode(); 3318 ASSERT(symbol); 3319 3320 if(symbol) 3321 { 3322 index = allocate(attributes, attribute); 3323 const TType &type = attribute->getType(); 3324 int registerCount = attribute->totalRegisterCount(); 3325 sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT; 3326 switch(type.getBasicType()) 3327 { 3328 case EbtInt: 3329 attribType = sw::VertexShader::ATTRIBTYPE_INT; 3330 break; 3331 case EbtUInt: 3332 attribType = sw::VertexShader::ATTRIBTYPE_UINT; 3333 break; 3334 case EbtFloat: 3335 default: 3336 break; 3337 } 3338 3339 if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS) 3340 { 3341 for(int i = 0; i < registerCount; i++) 3342 { 3343 vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType); 3344 } 3345 } 3346 3347 ActiveAttributes &activeAttributes = shaderObject->activeAttributes; 3348 3349 const char *name = symbol->getSymbol().c_str(); 3350 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index)); 3351 } 3352 } 3353 3354 return index; 3355 } 3356 fragmentOutputRegister(TIntermTyped * fragmentOutput)3357 int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput) 3358 { 3359 return allocate(fragmentOutputs, fragmentOutput); 3360 } 3361 samplerRegister(TIntermTyped * sampler)3362 int OutputASM::samplerRegister(TIntermTyped *sampler) 3363 { 3364 const TType &type = sampler->getType(); 3365 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3366 3367 TIntermSymbol *symbol = sampler->getAsSymbolNode(); 3368 TIntermBinary *binary = sampler->getAsBinaryNode(); 3369 3370 if(symbol) 3371 { 3372 switch(type.getQualifier()) 3373 { 3374 case EvqUniform: 3375 return samplerRegister(symbol); 3376 case EvqIn: 3377 case EvqConstReadOnly: 3378 // Function arguments are not (uniform) sampler registers 3379 return -1; 3380 default: 3381 UNREACHABLE(type.getQualifier()); 3382 } 3383 } 3384 else if(binary) 3385 { 3386 TIntermTyped *left = binary->getLeft(); 3387 TIntermTyped *right = binary->getRight(); 3388 const TType &leftType = left->getType(); 3389 int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0; 3390 int offset = 0; 3391 3392 switch(binary->getOp()) 3393 { 3394 case EOpIndexDirect: 3395 ASSERT(left->isArray()); 3396 offset = index * leftType.samplerRegisterCount(); 3397 break; 3398 case EOpIndexDirectStruct: 3399 ASSERT(leftType.isStruct()); 3400 { 3401 const TFieldList &fields = leftType.getStruct()->fields(); 3402 3403 for(int i = 0; i < index; i++) 3404 { 3405 offset += fields[i]->type()->totalSamplerRegisterCount(); 3406 } 3407 } 3408 break; 3409 case EOpIndexIndirect: // Indirect indexing produces a temporary, not a sampler register 3410 return -1; 3411 case EOpIndexDirectInterfaceBlock: // Interface blocks can't contain samplers 3412 default: 3413 UNREACHABLE(binary->getOp()); 3414 return -1; 3415 } 3416 3417 int base = samplerRegister(left); 3418 3419 if(base < 0) 3420 { 3421 return -1; 3422 } 3423 3424 return base + offset; 3425 } 3426 3427 UNREACHABLE(0); 3428 return -1; // Not a (uniform) sampler register 3429 } 3430 samplerRegister(TIntermSymbol * sampler)3431 int OutputASM::samplerRegister(TIntermSymbol *sampler) 3432 { 3433 const TType &type = sampler->getType(); 3434 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3435 3436 int index = lookup(samplers, sampler); 3437 3438 if(index == -1) 3439 { 3440 index = allocate(samplers, sampler, true); 3441 3442 if(sampler->getQualifier() == EvqUniform) 3443 { 3444 const char *name = sampler->getSymbol().c_str(); 3445 declareUniform(type, name, index, true); 3446 } 3447 } 3448 3449 return index; 3450 } 3451 isSamplerRegister(TIntermTyped * operand)3452 bool OutputASM::isSamplerRegister(TIntermTyped *operand) 3453 { 3454 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0; 3455 } 3456 lookup(VariableArray & list,TIntermTyped * variable)3457 int OutputASM::lookup(VariableArray &list, TIntermTyped *variable) 3458 { 3459 for(unsigned int i = 0; i < list.size(); i++) 3460 { 3461 if(list[i] == variable) 3462 { 3463 return i; // Pointer match 3464 } 3465 } 3466 3467 TIntermSymbol *varSymbol = variable->getAsSymbolNode(); 3468 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock(); 3469 3470 if(varBlock) 3471 { 3472 for(unsigned int i = 0; i < list.size(); i++) 3473 { 3474 if(list[i]) 3475 { 3476 TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock(); 3477 3478 if(listBlock) 3479 { 3480 if(listBlock->name() == varBlock->name()) 3481 { 3482 ASSERT(listBlock->arraySize() == varBlock->arraySize()); 3483 ASSERT(listBlock->fields() == varBlock->fields()); 3484 ASSERT(listBlock->blockStorage() == varBlock->blockStorage()); 3485 ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking()); 3486 3487 return i; 3488 } 3489 } 3490 } 3491 } 3492 } 3493 else if(varSymbol) 3494 { 3495 for(unsigned int i = 0; i < list.size(); i++) 3496 { 3497 if(list[i]) 3498 { 3499 TIntermSymbol *listSymbol = list[i]->getAsSymbolNode(); 3500 3501 if(listSymbol) 3502 { 3503 if(listSymbol->getId() == varSymbol->getId()) 3504 { 3505 ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol()); 3506 ASSERT(listSymbol->getType() == varSymbol->getType()); 3507 ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier()); 3508 3509 return i; 3510 } 3511 } 3512 } 3513 } 3514 } 3515 3516 return -1; 3517 } 3518 lookup(VariableArray & list,TInterfaceBlock * block)3519 int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block) 3520 { 3521 for(unsigned int i = 0; i < list.size(); i++) 3522 { 3523 if(list[i] && (list[i]->getType().getInterfaceBlock() == block)) 3524 { 3525 return i; // Pointer match 3526 } 3527 } 3528 return -1; 3529 } 3530 allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3531 int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly) 3532 { 3533 int index = lookup(list, variable); 3534 3535 if(index == -1) 3536 { 3537 unsigned int registerCount = variable->blockRegisterCount(samplersOnly); 3538 3539 for(unsigned int i = 0; i < list.size(); i++) 3540 { 3541 if(list[i] == 0) 3542 { 3543 unsigned int j = 1; 3544 for( ; j < registerCount && (i + j) < list.size(); j++) 3545 { 3546 if(list[i + j] != 0) 3547 { 3548 break; 3549 } 3550 } 3551 3552 if(j == registerCount) // Found free slots 3553 { 3554 for(unsigned int j = 0; j < registerCount; j++) 3555 { 3556 list[i + j] = variable; 3557 } 3558 3559 return i; 3560 } 3561 } 3562 } 3563 3564 index = list.size(); 3565 3566 for(unsigned int i = 0; i < registerCount; i++) 3567 { 3568 list.push_back(variable); 3569 } 3570 } 3571 3572 return index; 3573 } 3574 free(VariableArray & list,TIntermTyped * variable)3575 void OutputASM::free(VariableArray &list, TIntermTyped *variable) 3576 { 3577 int index = lookup(list, variable); 3578 3579 if(index >= 0) 3580 { 3581 list[index] = 0; 3582 } 3583 } 3584 blockMemberLookup(const TType & type,const TString & name,int registerIndex)3585 int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex) 3586 { 3587 const TInterfaceBlock *block = type.getInterfaceBlock(); 3588 3589 if(block) 3590 { 3591 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3592 const TFieldList& fields = block->fields(); 3593 const TString &blockName = block->name(); 3594 int fieldRegisterIndex = registerIndex; 3595 3596 if(!type.isInterfaceBlock()) 3597 { 3598 // This is a uniform that's part of a block, let's see if the block is already defined 3599 for(size_t i = 0; i < activeUniformBlocks.size(); ++i) 3600 { 3601 if(activeUniformBlocks[i].name == blockName.c_str()) 3602 { 3603 // The block is already defined, find the register for the current uniform and return it 3604 for(size_t j = 0; j < fields.size(); j++) 3605 { 3606 const TString &fieldName = fields[j]->name(); 3607 if(fieldName == name) 3608 { 3609 return fieldRegisterIndex; 3610 } 3611 3612 fieldRegisterIndex += fields[j]->type()->totalRegisterCount(); 3613 } 3614 3615 ASSERT(false); 3616 return fieldRegisterIndex; 3617 } 3618 } 3619 } 3620 } 3621 3622 return -1; 3623 } 3624 declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3625 void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder) 3626 { 3627 const TStructure *structure = type.getStruct(); 3628 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr; 3629 3630 if(!structure && !block) 3631 { 3632 ActiveUniforms &activeUniforms = shaderObject->activeUniforms; 3633 const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo(); 3634 if(blockId >= 0) 3635 { 3636 blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type))); 3637 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size()); 3638 } 3639 int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex; 3640 bool isSampler = IsSampler(type.getBasicType()); 3641 if(isSampler && samplersOnly) 3642 { 3643 for(int i = 0; i < type.totalRegisterCount(); i++) 3644 { 3645 shader->declareSampler(fieldRegisterIndex + i); 3646 } 3647 } 3648 if(isSampler == samplersOnly) 3649 { 3650 activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo)); 3651 } 3652 } 3653 else if(block) 3654 { 3655 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3656 const TFieldList& fields = block->fields(); 3657 const TString &blockName = block->name(); 3658 int fieldRegisterIndex = registerIndex; 3659 bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1); 3660 3661 blockId = activeUniformBlocks.size(); 3662 bool isRowMajor = block->matrixPacking() == EmpRowMajor; 3663 activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(), 3664 block->blockStorage(), isRowMajor, registerIndex, blockId)); 3665 blockDefinitions.push_back(BlockDefinitionIndexMap()); 3666 3667 Std140BlockEncoder currentBlockEncoder; 3668 currentBlockEncoder.enterAggregateType(); 3669 for(const auto &field : fields) 3670 { 3671 const TType &fieldType = *(field->type()); 3672 const TString &fieldName = field->name(); 3673 if(isUniformBlockMember && (fieldName == name)) 3674 { 3675 registerIndex = fieldRegisterIndex; 3676 } 3677 3678 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName; 3679 3680 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, ¤tBlockEncoder); 3681 fieldRegisterIndex += fieldType.totalRegisterCount(); 3682 } 3683 currentBlockEncoder.exitAggregateType(); 3684 activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize(); 3685 } 3686 else 3687 { 3688 // Store struct for program link time validation 3689 shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo())); 3690 3691 int fieldRegisterIndex = registerIndex; 3692 3693 const TFieldList& fields = structure->fields(); 3694 if(type.isArray() && (structure || type.isInterfaceBlock())) 3695 { 3696 for(int i = 0; i < type.getArraySize(); i++) 3697 { 3698 if(encoder) 3699 { 3700 encoder->enterAggregateType(); 3701 } 3702 for(const auto &field : fields) 3703 { 3704 const TType &fieldType = *(field->type()); 3705 const TString &fieldName = field->name(); 3706 const TString uniformName = name + "[" + str(i) + "]." + fieldName; 3707 3708 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3709 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3710 } 3711 if(encoder) 3712 { 3713 encoder->exitAggregateType(); 3714 } 3715 } 3716 } 3717 else 3718 { 3719 if(encoder) 3720 { 3721 encoder->enterAggregateType(); 3722 } 3723 for(const auto &field : fields) 3724 { 3725 const TType &fieldType = *(field->type()); 3726 const TString &fieldName = field->name(); 3727 const TString uniformName = name + "." + fieldName; 3728 3729 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3730 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3731 } 3732 if(encoder) 3733 { 3734 encoder->exitAggregateType(); 3735 } 3736 } 3737 } 3738 } 3739 dim(TIntermNode * v)3740 int OutputASM::dim(TIntermNode *v) 3741 { 3742 TIntermTyped *vector = v->getAsTyped(); 3743 ASSERT(vector && vector->isRegister()); 3744 return vector->getNominalSize(); 3745 } 3746 dim2(TIntermNode * m)3747 int OutputASM::dim2(TIntermNode *m) 3748 { 3749 TIntermTyped *matrix = m->getAsTyped(); 3750 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray()); 3751 return matrix->getSecondarySize(); 3752 } 3753 3754 // Sets iterations to ~0u if no loop count could be statically determined. LoopInfo(TIntermLoop * node)3755 OutputASM::LoopInfo::LoopInfo(TIntermLoop *node) 3756 { 3757 // Parse loops of the form: 3758 // for(int index = initial; index [comparator] limit; index [op] increment) 3759 3760 // Parse index name and intial value 3761 if(node->getInit()) 3762 { 3763 TIntermAggregate *init = node->getInit()->getAsAggregate(); 3764 3765 if(init) 3766 { 3767 TIntermSequence &sequence = init->getSequence(); 3768 TIntermTyped *variable = sequence[0]->getAsTyped(); 3769 3770 if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt) 3771 { 3772 TIntermBinary *assign = variable->getAsBinaryNode(); 3773 3774 if(assign && assign->getOp() == EOpInitialize) 3775 { 3776 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode(); 3777 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion(); 3778 3779 if(symbol && constant) 3780 { 3781 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3782 { 3783 index = symbol; 3784 initial = constant->getUnionArrayPointer()[0].getIConst(); 3785 } 3786 } 3787 } 3788 } 3789 } 3790 } 3791 3792 // Parse comparator and limit value 3793 if(index && node->getCondition()) 3794 { 3795 TIntermBinary *test = node->getCondition()->getAsBinaryNode(); 3796 TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr; 3797 3798 if(left && (left->getId() == index->getId())) 3799 { 3800 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion(); 3801 3802 if(constant) 3803 { 3804 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3805 { 3806 comparator = test->getOp(); 3807 limit = constant->getUnionArrayPointer()[0].getIConst(); 3808 } 3809 } 3810 } 3811 } 3812 3813 // Parse increment 3814 if(index && comparator != EOpNull && node->getExpression()) 3815 { 3816 TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode(); 3817 TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode(); 3818 3819 if(binaryTerminal) 3820 { 3821 TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode(); 3822 3823 if(operand && operand->getId() == index->getId()) 3824 { 3825 TOperator op = binaryTerminal->getOp(); 3826 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion(); 3827 3828 if(constant) 3829 { 3830 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3831 { 3832 int value = constant->getUnionArrayPointer()[0].getIConst(); 3833 3834 switch(op) 3835 { 3836 case EOpAddAssign: increment = value; break; 3837 case EOpSubAssign: increment = -value; break; 3838 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3839 } 3840 } 3841 } 3842 } 3843 } 3844 else if(unaryTerminal) 3845 { 3846 TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode(); 3847 3848 if(operand && operand->getId() == index->getId()) 3849 { 3850 TOperator op = unaryTerminal->getOp(); 3851 3852 switch(op) 3853 { 3854 case EOpPostIncrement: increment = 1; break; 3855 case EOpPostDecrement: increment = -1; break; 3856 case EOpPreIncrement: increment = 1; break; 3857 case EOpPreDecrement: increment = -1; break; 3858 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3859 } 3860 } 3861 } 3862 } 3863 3864 if(index && comparator != EOpNull && increment != 0) 3865 { 3866 // Check the loop body for return statements or changes to the index variable that make it non-deterministic. 3867 LoopUnrollable loopUnrollable; 3868 bool unrollable = loopUnrollable.traverse(node, index->getId()); 3869 3870 if(!unrollable) 3871 { 3872 iterations = ~0u; 3873 return; 3874 } 3875 3876 if(comparator == EOpLessThanEqual) 3877 { 3878 comparator = EOpLessThan; 3879 limit += 1; 3880 } 3881 else if(comparator == EOpGreaterThanEqual) 3882 { 3883 comparator = EOpLessThan; 3884 limit -= 1; 3885 std::swap(initial, limit); 3886 increment = -increment; 3887 } 3888 else if(comparator == EOpGreaterThan) 3889 { 3890 comparator = EOpLessThan; 3891 std::swap(initial, limit); 3892 increment = -increment; 3893 } 3894 3895 if(comparator == EOpLessThan) 3896 { 3897 if(!(initial < limit)) // Never loops 3898 { 3899 iterations = 0; 3900 } 3901 else if(increment < 0) 3902 { 3903 iterations = ~0u; 3904 } 3905 else 3906 { 3907 iterations = (limit - initial + abs(increment) - 1) / increment; // Ceiling division 3908 } 3909 } 3910 else 3911 { 3912 // Rare cases left unhandled. Treated as non-deterministic. 3913 iterations = ~0u; 3914 } 3915 } 3916 } 3917 traverse(TIntermLoop * loop,int indexId)3918 bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId) 3919 { 3920 loopUnrollable = true; 3921 3922 loopIndexId = indexId; 3923 TIntermNode *body = loop->getBody(); 3924 3925 if(body) 3926 { 3927 body->traverse(this); 3928 } 3929 3930 return loopUnrollable; 3931 } 3932 visitSymbol(TIntermSymbol * node)3933 void LoopUnrollable::visitSymbol(TIntermSymbol *node) 3934 { 3935 // Check that the loop index is not used as the argument to a function out or inout parameter. 3936 if(node->getId() == loopIndexId) 3937 { 3938 if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut) 3939 { 3940 loopUnrollable = false; 3941 } 3942 } 3943 } 3944 visitBinary(Visit visit,TIntermBinary * node)3945 bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node) 3946 { 3947 if(!loopUnrollable) 3948 { 3949 return false; 3950 } 3951 3952 // Check that the loop index is not statically assigned to. 3953 TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode(); 3954 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 3955 3956 return loopUnrollable; 3957 } 3958 visitUnary(Visit visit,TIntermUnary * node)3959 bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node) 3960 { 3961 if(!loopUnrollable) 3962 { 3963 return false; 3964 } 3965 3966 // Check that the loop index is not statically assigned to. 3967 TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode(); 3968 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 3969 3970 return loopUnrollable; 3971 } 3972 visitBranch(Visit visit,TIntermBranch * node)3973 bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node) 3974 { 3975 if(!loopUnrollable) 3976 { 3977 return false; 3978 } 3979 3980 switch(node->getFlowOp()) 3981 { 3982 case EOpKill: 3983 case EOpReturn: 3984 case EOpBreak: 3985 case EOpContinue: 3986 loopUnrollable = false; 3987 break; 3988 default: UNREACHABLE(node->getFlowOp()); 3989 } 3990 3991 return loopUnrollable; 3992 } 3993 visitAggregate(Visit visit,TIntermAggregate * node)3994 bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node) 3995 { 3996 return loopUnrollable; 3997 } 3998 } 3999