1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "OutputASM.h" 16 #include "Common/Math.hpp" 17 18 #include "common/debug.h" 19 #include "InfoSink.h" 20 21 #include "libGLESv2/Shader.h" 22 23 #include <GLES2/gl2.h> 24 #include <GLES2/gl2ext.h> 25 #include <GLES3/gl3.h> 26 #include <GL/glcorearb.h> 27 #include <GL/glext.h> 28 29 #include <stdlib.h> 30 31 namespace 32 { glVariableType(const TType & type)33 GLenum glVariableType(const TType &type) 34 { 35 switch(type.getBasicType()) 36 { 37 case EbtFloat: 38 if(type.isScalar()) 39 { 40 return GL_FLOAT; 41 } 42 else if(type.isVector()) 43 { 44 switch(type.getNominalSize()) 45 { 46 case 2: return GL_FLOAT_VEC2; 47 case 3: return GL_FLOAT_VEC3; 48 case 4: return GL_FLOAT_VEC4; 49 default: UNREACHABLE(type.getNominalSize()); 50 } 51 } 52 else if(type.isMatrix()) 53 { 54 switch(type.getNominalSize()) 55 { 56 case 2: 57 switch(type.getSecondarySize()) 58 { 59 case 2: return GL_FLOAT_MAT2; 60 case 3: return GL_FLOAT_MAT2x3; 61 case 4: return GL_FLOAT_MAT2x4; 62 default: UNREACHABLE(type.getSecondarySize()); 63 } 64 case 3: 65 switch(type.getSecondarySize()) 66 { 67 case 2: return GL_FLOAT_MAT3x2; 68 case 3: return GL_FLOAT_MAT3; 69 case 4: return GL_FLOAT_MAT3x4; 70 default: UNREACHABLE(type.getSecondarySize()); 71 } 72 case 4: 73 switch(type.getSecondarySize()) 74 { 75 case 2: return GL_FLOAT_MAT4x2; 76 case 3: return GL_FLOAT_MAT4x3; 77 case 4: return GL_FLOAT_MAT4; 78 default: UNREACHABLE(type.getSecondarySize()); 79 } 80 default: UNREACHABLE(type.getNominalSize()); 81 } 82 } 83 else UNREACHABLE(0); 84 break; 85 case EbtInt: 86 if(type.isScalar()) 87 { 88 return GL_INT; 89 } 90 else if(type.isVector()) 91 { 92 switch(type.getNominalSize()) 93 { 94 case 2: return GL_INT_VEC2; 95 case 3: return GL_INT_VEC3; 96 case 4: return GL_INT_VEC4; 97 default: UNREACHABLE(type.getNominalSize()); 98 } 99 } 100 else UNREACHABLE(0); 101 break; 102 case EbtUInt: 103 if(type.isScalar()) 104 { 105 return GL_UNSIGNED_INT; 106 } 107 else if(type.isVector()) 108 { 109 switch(type.getNominalSize()) 110 { 111 case 2: return GL_UNSIGNED_INT_VEC2; 112 case 3: return GL_UNSIGNED_INT_VEC3; 113 case 4: return GL_UNSIGNED_INT_VEC4; 114 default: UNREACHABLE(type.getNominalSize()); 115 } 116 } 117 else UNREACHABLE(0); 118 break; 119 case EbtBool: 120 if(type.isScalar()) 121 { 122 return GL_BOOL; 123 } 124 else if(type.isVector()) 125 { 126 switch(type.getNominalSize()) 127 { 128 case 2: return GL_BOOL_VEC2; 129 case 3: return GL_BOOL_VEC3; 130 case 4: return GL_BOOL_VEC4; 131 default: UNREACHABLE(type.getNominalSize()); 132 } 133 } 134 else UNREACHABLE(0); 135 break; 136 case EbtSampler2D: 137 return GL_SAMPLER_2D; 138 case EbtISampler2D: 139 return GL_INT_SAMPLER_2D; 140 case EbtUSampler2D: 141 return GL_UNSIGNED_INT_SAMPLER_2D; 142 case EbtSamplerCube: 143 return GL_SAMPLER_CUBE; 144 case EbtSampler2DRect: 145 return GL_SAMPLER_2D_RECT_ARB; 146 case EbtISamplerCube: 147 return GL_INT_SAMPLER_CUBE; 148 case EbtUSamplerCube: 149 return GL_UNSIGNED_INT_SAMPLER_CUBE; 150 case EbtSamplerExternalOES: 151 return GL_SAMPLER_EXTERNAL_OES; 152 case EbtSampler3D: 153 return GL_SAMPLER_3D_OES; 154 case EbtISampler3D: 155 return GL_INT_SAMPLER_3D; 156 case EbtUSampler3D: 157 return GL_UNSIGNED_INT_SAMPLER_3D; 158 case EbtSampler2DArray: 159 return GL_SAMPLER_2D_ARRAY; 160 case EbtISampler2DArray: 161 return GL_INT_SAMPLER_2D_ARRAY; 162 case EbtUSampler2DArray: 163 return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY; 164 case EbtSampler2DShadow: 165 return GL_SAMPLER_2D_SHADOW; 166 case EbtSamplerCubeShadow: 167 return GL_SAMPLER_CUBE_SHADOW; 168 case EbtSampler2DArrayShadow: 169 return GL_SAMPLER_2D_ARRAY_SHADOW; 170 default: 171 UNREACHABLE(type.getBasicType()); 172 break; 173 } 174 175 return GL_NONE; 176 } 177 glVariablePrecision(const TType & type)178 GLenum glVariablePrecision(const TType &type) 179 { 180 if(type.getBasicType() == EbtFloat) 181 { 182 switch(type.getPrecision()) 183 { 184 case EbpHigh: return GL_HIGH_FLOAT; 185 case EbpMedium: return GL_MEDIUM_FLOAT; 186 case EbpLow: return GL_LOW_FLOAT; 187 case EbpUndefined: 188 // Should be defined as the default precision by the parser 189 default: UNREACHABLE(type.getPrecision()); 190 } 191 } 192 else if(type.getBasicType() == EbtInt) 193 { 194 switch(type.getPrecision()) 195 { 196 case EbpHigh: return GL_HIGH_INT; 197 case EbpMedium: return GL_MEDIUM_INT; 198 case EbpLow: return GL_LOW_INT; 199 case EbpUndefined: 200 // Should be defined as the default precision by the parser 201 default: UNREACHABLE(type.getPrecision()); 202 } 203 } 204 205 // Other types (boolean, sampler) don't have a precision 206 return GL_NONE; 207 } 208 } 209 210 namespace glsl 211 { 212 // Integer to TString conversion str(int i)213 TString str(int i) 214 { 215 char buffer[20]; 216 sprintf(buffer, "%d", i); 217 return buffer; 218 } 219 220 class Temporary : public TIntermSymbol 221 { 222 public: Temporary(OutputASM * assembler)223 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler) 224 { 225 } 226 ~Temporary()227 ~Temporary() 228 { 229 assembler->freeTemporary(this); 230 } 231 232 private: 233 OutputASM *const assembler; 234 }; 235 236 class Constant : public TIntermConstantUnion 237 { 238 public: Constant(float x,float y,float z,float w)239 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false)) 240 { 241 constants[0].setFConst(x); 242 constants[1].setFConst(y); 243 constants[2].setFConst(z); 244 constants[3].setFConst(w); 245 } 246 Constant(bool b)247 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false)) 248 { 249 constants[0].setBConst(b); 250 } 251 Constant(int i)252 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false)) 253 { 254 constants[0].setIConst(i); 255 } 256 ~Constant()257 ~Constant() 258 { 259 } 260 261 private: 262 ConstantUnion constants[4]; 263 }; 264 ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) : 266 type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)), 267 name(name), arraySize(type.getArraySize()), registerIndex(registerIndex) 268 { 269 if(type.isStruct()) 270 { 271 for(const auto& field : type.getStruct()->fields()) 272 { 273 fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1)); 274 } 275 } 276 } 277 Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) : 279 ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo) 280 { 281 } 282 UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize, 284 TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) : 285 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout), 286 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId) 287 { 288 } 289 BlockLayoutEncoder()290 BlockLayoutEncoder::BlockLayoutEncoder() 291 : mCurrentOffset(0) 292 { 293 } 294 encodeType(const TType & type)295 BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type) 296 { 297 int arrayStride; 298 int matrixStride; 299 300 bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor; 301 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride); 302 303 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent), 304 static_cast<int>(arrayStride * BytesPerComponent), 305 static_cast<int>(matrixStride * BytesPerComponent), 306 (matrixStride > 0) && isRowMajor); 307 308 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride); 309 310 return memberInfo; 311 } 312 313 // static getBlockRegister(const BlockMemberInfo & info)314 size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info) 315 { 316 return (info.offset / BytesPerComponent) / ComponentsPerRegister; 317 } 318 319 // static getBlockRegisterElement(const BlockMemberInfo & info)320 size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info) 321 { 322 return (info.offset / BytesPerComponent) % ComponentsPerRegister; 323 } 324 nextRegister()325 void BlockLayoutEncoder::nextRegister() 326 { 327 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister); 328 } 329 Std140BlockEncoder()330 Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder() 331 { 332 } 333 enterAggregateType()334 void Std140BlockEncoder::enterAggregateType() 335 { 336 nextRegister(); 337 } 338 exitAggregateType()339 void Std140BlockEncoder::exitAggregateType() 340 { 341 nextRegister(); 342 } 343 getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut) 345 { 346 size_t baseAlignment = 0; 347 int matrixStride = 0; 348 int arrayStride = 0; 349 350 if(type.isMatrix()) 351 { 352 baseAlignment = ComponentsPerRegister; 353 matrixStride = ComponentsPerRegister; 354 355 if(arraySize > 0) 356 { 357 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 358 arrayStride = ComponentsPerRegister * numRegisters; 359 } 360 } 361 else if(arraySize > 0) 362 { 363 baseAlignment = ComponentsPerRegister; 364 arrayStride = ComponentsPerRegister; 365 } 366 else 367 { 368 const size_t numComponents = type.getElementSize(); 369 baseAlignment = (numComponents == 3 ? 4u : numComponents); 370 } 371 372 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment); 373 374 *matrixStrideOut = matrixStride; 375 *arrayStrideOut = arrayStride; 376 } 377 advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride) 379 { 380 if(arraySize > 0) 381 { 382 mCurrentOffset += arrayStride * arraySize; 383 } 384 else if(type.isMatrix()) 385 { 386 ASSERT(matrixStride == ComponentsPerRegister); 387 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 388 mCurrentOffset += ComponentsPerRegister * numRegisters; 389 } 390 else 391 { 392 mCurrentOffset += type.getElementSize(); 393 } 394 } 395 Attribute()396 Attribute::Attribute() 397 { 398 type = GL_NONE; 399 arraySize = 0; 400 registerIndex = 0; 401 } 402 Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex) 404 { 405 this->type = type; 406 this->name = name; 407 this->arraySize = arraySize; 408 this->layoutLocation = layoutLocation; 409 this->registerIndex = registerIndex; 410 } 411 getPixelShader() const412 sw::PixelShader *Shader::getPixelShader() const 413 { 414 return nullptr; 415 } 416 getVertexShader() const417 sw::VertexShader *Shader::getVertexShader() const 418 { 419 return nullptr; 420 } 421 TextureFunction(const TString & nodeName)422 OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false) 423 { 424 TString name = TFunction::unmangleName(nodeName); 425 426 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect") 427 { 428 method = IMPLICIT; 429 } 430 else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj") 431 { 432 method = IMPLICIT; 433 proj = true; 434 } 435 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod") 436 { 437 method = LOD; 438 } 439 else if(name == "texture2DProjLod" || name == "textureProjLod") 440 { 441 method = LOD; 442 proj = true; 443 } 444 else if(name == "textureSize") 445 { 446 method = SIZE; 447 } 448 else if(name == "textureOffset") 449 { 450 method = IMPLICIT; 451 offset = true; 452 } 453 else if(name == "textureProjOffset") 454 { 455 method = IMPLICIT; 456 offset = true; 457 proj = true; 458 } 459 else if(name == "textureLodOffset") 460 { 461 method = LOD; 462 offset = true; 463 } 464 else if(name == "textureProjLodOffset") 465 { 466 method = LOD; 467 proj = true; 468 offset = true; 469 } 470 else if(name == "texelFetch") 471 { 472 method = FETCH; 473 } 474 else if(name == "texelFetchOffset") 475 { 476 method = FETCH; 477 offset = true; 478 } 479 else if(name == "textureGrad") 480 { 481 method = GRAD; 482 } 483 else if(name == "textureGradOffset") 484 { 485 method = GRAD; 486 offset = true; 487 } 488 else if(name == "textureProjGrad") 489 { 490 method = GRAD; 491 proj = true; 492 } 493 else if(name == "textureProjGradOffset") 494 { 495 method = GRAD; 496 proj = true; 497 offset = true; 498 } 499 else UNREACHABLE(0); 500 } 501 OutputASM(TParseContext & context,Shader * shaderObject)502 OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context) 503 { 504 shader = nullptr; 505 pixelShader = nullptr; 506 vertexShader = nullptr; 507 508 if(shaderObject) 509 { 510 shader = shaderObject->getShader(); 511 pixelShader = shaderObject->getPixelShader(); 512 vertexShader = shaderObject->getVertexShader(); 513 } 514 515 functionArray.push_back(Function(0, "main(", nullptr, nullptr)); 516 currentFunction = 0; 517 outputQualifier = EvqOutput; // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData 518 } 519 ~OutputASM()520 OutputASM::~OutputASM() 521 { 522 } 523 output()524 void OutputASM::output() 525 { 526 if(shader) 527 { 528 emitShader(GLOBAL); 529 530 if(functionArray.size() > 1) // Only call main() when there are other functions 531 { 532 Instruction *callMain = emit(sw::Shader::OPCODE_CALL); 533 callMain->dst.type = sw::Shader::PARAMETER_LABEL; 534 callMain->dst.index = 0; // main() 535 536 emit(sw::Shader::OPCODE_RET); 537 } 538 539 emitShader(FUNCTION); 540 } 541 } 542 emitShader(Scope scope)543 void OutputASM::emitShader(Scope scope) 544 { 545 emitScope = scope; 546 currentScope = GLOBAL; 547 mContext.getTreeRoot()->traverse(this); 548 } 549 freeTemporary(Temporary * temporary)550 void OutputASM::freeTemporary(Temporary *temporary) 551 { 552 free(temporaries, temporary); 553 } 554 getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const 556 { 557 TBasicType baseType = in->getType().getBasicType(); 558 559 switch(op) 560 { 561 case sw::Shader::OPCODE_NEG: 562 switch(baseType) 563 { 564 case EbtInt: 565 case EbtUInt: 566 return sw::Shader::OPCODE_INEG; 567 case EbtFloat: 568 default: 569 return op; 570 } 571 case sw::Shader::OPCODE_ABS: 572 switch(baseType) 573 { 574 case EbtInt: 575 return sw::Shader::OPCODE_IABS; 576 case EbtFloat: 577 default: 578 return op; 579 } 580 case sw::Shader::OPCODE_SGN: 581 switch(baseType) 582 { 583 case EbtInt: 584 return sw::Shader::OPCODE_ISGN; 585 case EbtFloat: 586 default: 587 return op; 588 } 589 case sw::Shader::OPCODE_ADD: 590 switch(baseType) 591 { 592 case EbtInt: 593 case EbtUInt: 594 return sw::Shader::OPCODE_IADD; 595 case EbtFloat: 596 default: 597 return op; 598 } 599 case sw::Shader::OPCODE_SUB: 600 switch(baseType) 601 { 602 case EbtInt: 603 case EbtUInt: 604 return sw::Shader::OPCODE_ISUB; 605 case EbtFloat: 606 default: 607 return op; 608 } 609 case sw::Shader::OPCODE_MUL: 610 switch(baseType) 611 { 612 case EbtInt: 613 case EbtUInt: 614 return sw::Shader::OPCODE_IMUL; 615 case EbtFloat: 616 default: 617 return op; 618 } 619 case sw::Shader::OPCODE_DIV: 620 switch(baseType) 621 { 622 case EbtInt: 623 return sw::Shader::OPCODE_IDIV; 624 case EbtUInt: 625 return sw::Shader::OPCODE_UDIV; 626 case EbtFloat: 627 default: 628 return op; 629 } 630 case sw::Shader::OPCODE_IMOD: 631 return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op; 632 case sw::Shader::OPCODE_ISHR: 633 return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op; 634 case sw::Shader::OPCODE_MIN: 635 switch(baseType) 636 { 637 case EbtInt: 638 return sw::Shader::OPCODE_IMIN; 639 case EbtUInt: 640 return sw::Shader::OPCODE_UMIN; 641 case EbtFloat: 642 default: 643 return op; 644 } 645 case sw::Shader::OPCODE_MAX: 646 switch(baseType) 647 { 648 case EbtInt: 649 return sw::Shader::OPCODE_IMAX; 650 case EbtUInt: 651 return sw::Shader::OPCODE_UMAX; 652 case EbtFloat: 653 default: 654 return op; 655 } 656 default: 657 return op; 658 } 659 } 660 visitSymbol(TIntermSymbol * symbol)661 void OutputASM::visitSymbol(TIntermSymbol *symbol) 662 { 663 // The type of vertex outputs and fragment inputs with the same name must match (validated at link time), 664 // so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code). 665 switch(symbol->getQualifier()) 666 { 667 case EvqVaryingIn: 668 case EvqVaryingOut: 669 case EvqInvariantVaryingIn: 670 case EvqInvariantVaryingOut: 671 case EvqVertexOut: 672 case EvqFragmentIn: 673 if(symbol->getBasicType() != EbtInvariant) // Typeless declarations are not new varyings 674 { 675 declareVarying(symbol, -1); 676 } 677 break; 678 case EvqFragmentOut: 679 declareFragmentOutput(symbol); 680 break; 681 default: 682 break; 683 } 684 685 TInterfaceBlock* block = symbol->getType().getInterfaceBlock(); 686 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables: 687 // "All members of a named uniform block declared with a shared or std140 layout qualifier 688 // are considered active, even if they are not referenced in any shader in the program. 689 // The uniform block itself is also considered active, even if no member of the block is referenced." 690 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140))) 691 { 692 uniformRegister(symbol); 693 } 694 } 695 visitBinary(Visit visit,TIntermBinary * node)696 bool OutputASM::visitBinary(Visit visit, TIntermBinary *node) 697 { 698 if(currentScope != emitScope) 699 { 700 return false; 701 } 702 703 TIntermTyped *result = node; 704 TIntermTyped *left = node->getLeft(); 705 TIntermTyped *right = node->getRight(); 706 const TType &leftType = left->getType(); 707 const TType &rightType = right->getType(); 708 709 if(isSamplerRegister(result)) 710 { 711 return false; // Don't traverse, the register index is determined statically 712 } 713 714 switch(node->getOp()) 715 { 716 case EOpAssign: 717 assert(visit == PreVisit); 718 right->traverse(this); 719 assignLvalue(left, right); 720 copy(result, right); 721 return false; 722 case EOpInitialize: 723 assert(visit == PreVisit); 724 // Constant arrays go into the constant register file. 725 if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1) 726 { 727 for(int i = 0; i < left->totalRegisterCount(); i++) 728 { 729 emit(sw::Shader::OPCODE_DEF, left, i, right, i); 730 } 731 } 732 else 733 { 734 right->traverse(this); 735 copy(left, right); 736 } 737 return false; 738 case EOpMatrixTimesScalarAssign: 739 assert(visit == PreVisit); 740 right->traverse(this); 741 for(int i = 0; i < leftType.getNominalSize(); i++) 742 { 743 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right); 744 } 745 746 assignLvalue(left, result); 747 return false; 748 case EOpVectorTimesMatrixAssign: 749 assert(visit == PreVisit); 750 { 751 // The left operand may contain a swizzle serving double-duty as 752 // swizzle and writemask, so it's important that we traverse it 753 // first. Otherwise we may end up never setting up our left 754 // operand correctly. 755 left->traverse(this); 756 right->traverse(this); 757 int size = leftType.getNominalSize(); 758 759 for(int i = 0; i < size; i++) 760 { 761 Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i); 762 dot->dst.mask = 1 << i; 763 } 764 765 assignLvalue(left, result); 766 } 767 return false; 768 case EOpMatrixTimesMatrixAssign: 769 assert(visit == PreVisit); 770 { 771 right->traverse(this); 772 int dim = leftType.getNominalSize(); 773 774 for(int i = 0; i < dim; i++) 775 { 776 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 777 mul->src[1].swizzle = 0x00; 778 779 for(int j = 1; j < dim; j++) 780 { 781 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 782 mad->src[1].swizzle = j * 0x55; 783 } 784 } 785 786 assignLvalue(left, result); 787 } 788 return false; 789 case EOpIndexDirect: 790 case EOpIndexIndirect: 791 case EOpIndexDirectStruct: 792 case EOpIndexDirectInterfaceBlock: 793 assert(visit == PreVisit); 794 evaluateRvalue(node); 795 return false; 796 case EOpVectorSwizzle: 797 if(visit == PostVisit) 798 { 799 int swizzle = 0; 800 TIntermAggregate *components = right->getAsAggregate(); 801 802 if(components) 803 { 804 TIntermSequence &sequence = components->getSequence(); 805 int component = 0; 806 807 for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++) 808 { 809 TIntermConstantUnion *element = (*sit)->getAsConstantUnion(); 810 811 if(element) 812 { 813 int i = element->getUnionArrayPointer()[0].getIConst(); 814 swizzle |= i << (component * 2); 815 component++; 816 } 817 else UNREACHABLE(0); 818 } 819 } 820 else UNREACHABLE(0); 821 822 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left); 823 mov->src[0].swizzle = swizzle; 824 } 825 break; 826 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break; 827 case EOpAdd: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right); break; 828 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break; 829 case EOpSub: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right); break; 830 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break; 831 case EOpMul: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right); break; 832 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break; 833 case EOpDiv: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right); break; 834 case EOpIModAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break; 835 case EOpIMod: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right); break; 836 case EOpBitShiftLeftAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break; 837 case EOpBitShiftLeft: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right); break; 838 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break; 839 case EOpBitShiftRight: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right); break; 840 case EOpBitwiseAndAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break; 841 case EOpBitwiseAnd: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right); break; 842 case EOpBitwiseXorAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break; 843 case EOpBitwiseXor: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right); break; 844 case EOpBitwiseOrAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right); break; 845 case EOpBitwiseOr: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right); break; 846 case EOpEqual: 847 if(visit == PostVisit) 848 { 849 emitBinary(sw::Shader::OPCODE_EQ, result, left, right); 850 851 for(int index = 1; index < left->totalRegisterCount(); index++) 852 { 853 Temporary equal(this); 854 emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index); 855 emit(sw::Shader::OPCODE_AND, result, result, &equal); 856 } 857 } 858 break; 859 case EOpNotEqual: 860 if(visit == PostVisit) 861 { 862 emitBinary(sw::Shader::OPCODE_NE, result, left, right); 863 864 for(int index = 1; index < left->totalRegisterCount(); index++) 865 { 866 Temporary notEqual(this); 867 emit(sw::Shader::OPCODE_NE, ¬Equal, 0, left, index, right, index); 868 emit(sw::Shader::OPCODE_OR, result, result, ¬Equal); 869 } 870 } 871 break; 872 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break; 873 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break; 874 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break; 875 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break; 876 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break; 877 case EOpVectorTimesScalar: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break; 878 case EOpMatrixTimesScalar: 879 if(visit == PostVisit) 880 { 881 if(left->isMatrix()) 882 { 883 for(int i = 0; i < leftType.getNominalSize(); i++) 884 { 885 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0); 886 } 887 } 888 else if(right->isMatrix()) 889 { 890 for(int i = 0; i < rightType.getNominalSize(); i++) 891 { 892 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 893 } 894 } 895 else UNREACHABLE(0); 896 } 897 break; 898 case EOpVectorTimesMatrix: 899 if(visit == PostVisit) 900 { 901 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize()); 902 903 int size = rightType.getNominalSize(); 904 for(int i = 0; i < size; i++) 905 { 906 Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i); 907 dot->dst.mask = 1 << i; 908 } 909 } 910 break; 911 case EOpMatrixTimesVector: 912 if(visit == PostVisit) 913 { 914 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right); 915 mul->src[1].swizzle = 0x00; 916 917 int size = rightType.getNominalSize(); 918 for(int i = 1; i < size; i++) 919 { 920 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result); 921 mad->src[1].swizzle = i * 0x55; 922 } 923 } 924 break; 925 case EOpMatrixTimesMatrix: 926 if(visit == PostVisit) 927 { 928 int dim = leftType.getNominalSize(); 929 930 int size = rightType.getNominalSize(); 931 for(int i = 0; i < size; i++) 932 { 933 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 934 mul->src[1].swizzle = 0x00; 935 936 for(int j = 1; j < dim; j++) 937 { 938 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 939 mad->src[1].swizzle = j * 0x55; 940 } 941 } 942 } 943 break; 944 case EOpLogicalOr: 945 if(trivial(right, 6)) 946 { 947 if(visit == PostVisit) 948 { 949 emit(sw::Shader::OPCODE_OR, result, left, right); 950 } 951 } 952 else // Short-circuit evaluation 953 { 954 if(visit == InVisit) 955 { 956 emit(sw::Shader::OPCODE_MOV, result, left); 957 Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result); 958 ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT; 959 } 960 else if(visit == PostVisit) 961 { 962 emit(sw::Shader::OPCODE_MOV, result, right); 963 emit(sw::Shader::OPCODE_ENDIF); 964 } 965 } 966 break; 967 case EOpLogicalXor: if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break; 968 case EOpLogicalAnd: 969 if(trivial(right, 6)) 970 { 971 if(visit == PostVisit) 972 { 973 emit(sw::Shader::OPCODE_AND, result, left, right); 974 } 975 } 976 else // Short-circuit evaluation 977 { 978 if(visit == InVisit) 979 { 980 emit(sw::Shader::OPCODE_MOV, result, left); 981 emit(sw::Shader::OPCODE_IF, 0, result); 982 } 983 else if(visit == PostVisit) 984 { 985 emit(sw::Shader::OPCODE_MOV, result, right); 986 emit(sw::Shader::OPCODE_ENDIF); 987 } 988 } 989 break; 990 default: UNREACHABLE(node->getOp()); 991 } 992 993 return true; 994 } 995 emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow) 997 { 998 switch(size) 999 { 1000 case 1: // Used for cofactor computation only 1001 { 1002 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1003 bool isMov = (row == col); 1004 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG; 1005 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row); 1006 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col); 1007 mov->dst.mask = 1 << outRow; 1008 } 1009 break; 1010 case 2: 1011 { 1012 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy 1013 1014 bool isCofactor = (col >= 0) && (row >= 0); 1015 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1016 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1017 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1018 1019 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1); 1020 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2]; 1021 det->dst.mask = 1 << outRow; 1022 } 1023 break; 1024 case 3: 1025 { 1026 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw 1027 1028 bool isCofactor = (col >= 0) && (row >= 0); 1029 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1030 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1031 int col2 = (isCofactor && (col <= 2)) ? 3 : 2; 1032 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1033 1034 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2); 1035 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3]; 1036 det->dst.mask = 1 << outRow; 1037 } 1038 break; 1039 case 4: 1040 { 1041 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3); 1042 det->dst.mask = 1 << outRow; 1043 } 1044 break; 1045 default: 1046 UNREACHABLE(size); 1047 break; 1048 } 1049 } 1050 visitUnary(Visit visit,TIntermUnary * node)1051 bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) 1052 { 1053 if(currentScope != emitScope) 1054 { 1055 return false; 1056 } 1057 1058 TIntermTyped *result = node; 1059 TIntermTyped *arg = node->getOperand(); 1060 TBasicType basicType = arg->getType().getBasicType(); 1061 1062 union 1063 { 1064 float f; 1065 int i; 1066 } one_value; 1067 1068 if(basicType == EbtInt || basicType == EbtUInt) 1069 { 1070 one_value.i = 1; 1071 } 1072 else 1073 { 1074 one_value.f = 1.0f; 1075 } 1076 1077 Constant one(one_value.f, one_value.f, one_value.f, one_value.f); 1078 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f); 1079 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f); 1080 1081 switch(node->getOp()) 1082 { 1083 case EOpNegative: 1084 if(visit == PostVisit) 1085 { 1086 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg); 1087 for(int index = 0; index < arg->totalRegisterCount(); index++) 1088 { 1089 emit(negOpcode, result, index, arg, index); 1090 } 1091 } 1092 break; 1093 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1094 case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1095 case EOpBitwiseNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1096 case EOpPostIncrement: 1097 if(visit == PostVisit) 1098 { 1099 copy(result, arg); 1100 1101 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1102 for(int index = 0; index < arg->totalRegisterCount(); index++) 1103 { 1104 emit(addOpcode, arg, index, arg, index, &one); 1105 } 1106 1107 assignLvalue(arg, arg); 1108 } 1109 break; 1110 case EOpPostDecrement: 1111 if(visit == PostVisit) 1112 { 1113 copy(result, arg); 1114 1115 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1116 for(int index = 0; index < arg->totalRegisterCount(); index++) 1117 { 1118 emit(subOpcode, arg, index, arg, index, &one); 1119 } 1120 1121 assignLvalue(arg, arg); 1122 } 1123 break; 1124 case EOpPreIncrement: 1125 if(visit == PostVisit) 1126 { 1127 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1128 for(int index = 0; index < arg->totalRegisterCount(); index++) 1129 { 1130 emit(addOpcode, result, index, arg, index, &one); 1131 } 1132 1133 assignLvalue(arg, result); 1134 } 1135 break; 1136 case EOpPreDecrement: 1137 if(visit == PostVisit) 1138 { 1139 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1140 for(int index = 0; index < arg->totalRegisterCount(); index++) 1141 { 1142 emit(subOpcode, result, index, arg, index, &one); 1143 } 1144 1145 assignLvalue(arg, result); 1146 } 1147 break; 1148 case EOpRadians: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break; 1149 case EOpDegrees: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, °); break; 1150 case EOpSin: if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break; 1151 case EOpCos: if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break; 1152 case EOpTan: if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break; 1153 case EOpAsin: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break; 1154 case EOpAcos: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break; 1155 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break; 1156 case EOpSinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break; 1157 case EOpCosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break; 1158 case EOpTanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break; 1159 case EOpAsinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break; 1160 case EOpAcosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break; 1161 case EOpAtanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break; 1162 case EOpExp: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break; 1163 case EOpLog: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break; 1164 case EOpExp2: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break; 1165 case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break; 1166 case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break; 1167 case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break; 1168 case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break; 1169 case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break; 1170 case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break; 1171 case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break; 1172 case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break; 1173 case EOpRoundEven: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break; 1174 case EOpCeil: if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break; 1175 case EOpFract: if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break; 1176 case EOpIsNan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break; 1177 case EOpIsInf: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break; 1178 case EOpLength: if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break; 1179 case EOpNormalize: if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break; 1180 case EOpDFdx: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break; 1181 case EOpDFdy: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break; 1182 case EOpFwidth: if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break; 1183 case EOpAny: if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break; 1184 case EOpAll: if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break; 1185 case EOpFloatBitsToInt: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break; 1186 case EOpFloatBitsToUint: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break; 1187 case EOpIntBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break; 1188 case EOpUintBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break; 1189 case EOpPackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break; 1190 case EOpPackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break; 1191 case EOpPackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break; 1192 case EOpUnpackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break; 1193 case EOpUnpackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break; 1194 case EOpUnpackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break; 1195 case EOpTranspose: 1196 if(visit == PostVisit) 1197 { 1198 int numCols = arg->getNominalSize(); 1199 int numRows = arg->getSecondarySize(); 1200 for(int i = 0; i < numCols; ++i) 1201 { 1202 for(int j = 0; j < numRows; ++j) 1203 { 1204 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i); 1205 mov->src[0].swizzle = 0x55 * j; 1206 mov->dst.mask = 1 << i; 1207 } 1208 } 1209 } 1210 break; 1211 case EOpDeterminant: 1212 if(visit == PostVisit) 1213 { 1214 int size = arg->getNominalSize(); 1215 ASSERT(size == arg->getSecondarySize()); 1216 1217 emitDeterminant(result, arg, size); 1218 } 1219 break; 1220 case EOpInverse: 1221 if(visit == PostVisit) 1222 { 1223 int size = arg->getNominalSize(); 1224 ASSERT(size == arg->getSecondarySize()); 1225 1226 // Compute transposed matrix of cofactors 1227 for(int i = 0; i < size; ++i) 1228 { 1229 for(int j = 0; j < size; ++j) 1230 { 1231 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1232 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant 1233 emitDeterminant(result, arg, size - 1, j, i, i, j); 1234 } 1235 } 1236 1237 // Compute 1 / determinant 1238 Temporary invDet(this); 1239 emitDeterminant(&invDet, arg, size); 1240 Constant one(1.0f, 1.0f, 1.0f, 1.0f); 1241 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet); 1242 div->src[1].swizzle = 0x00; // xxxx 1243 1244 // Divide transposed matrix of cofactors by determinant 1245 for(int i = 0; i < size; ++i) 1246 { 1247 emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet); 1248 } 1249 } 1250 break; 1251 default: UNREACHABLE(node->getOp()); 1252 } 1253 1254 return true; 1255 } 1256 visitAggregate(Visit visit,TIntermAggregate * node)1257 bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node) 1258 { 1259 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence) 1260 { 1261 return false; 1262 } 1263 1264 Constant zero(0.0f, 0.0f, 0.0f, 0.0f); 1265 1266 TIntermTyped *result = node; 1267 const TType &resultType = node->getType(); 1268 TIntermSequence &arg = node->getSequence(); 1269 int argumentCount = static_cast<int>(arg.size()); 1270 1271 switch(node->getOp()) 1272 { 1273 case EOpSequence: break; 1274 case EOpDeclaration: break; 1275 case EOpInvariantDeclaration: break; 1276 case EOpPrototype: break; 1277 case EOpComma: 1278 if(visit == PostVisit) 1279 { 1280 copy(result, arg[1]); 1281 } 1282 break; 1283 case EOpFunction: 1284 if(visit == PreVisit) 1285 { 1286 const TString &name = node->getName(); 1287 1288 if(emitScope == FUNCTION) 1289 { 1290 if(functionArray.size() > 1) // No need for a label when there's only main() 1291 { 1292 Instruction *label = emit(sw::Shader::OPCODE_LABEL); 1293 label->dst.type = sw::Shader::PARAMETER_LABEL; 1294 1295 const Function *function = findFunction(name); 1296 ASSERT(function); // Should have been added during global pass 1297 label->dst.index = function->label; 1298 currentFunction = function->label; 1299 } 1300 } 1301 else if(emitScope == GLOBAL) 1302 { 1303 if(name != "main(") 1304 { 1305 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence(); 1306 functionArray.push_back(Function(functionArray.size(), name, &arguments, node)); 1307 } 1308 } 1309 else UNREACHABLE(emitScope); 1310 1311 currentScope = FUNCTION; 1312 } 1313 else if(visit == PostVisit) 1314 { 1315 if(emitScope == FUNCTION) 1316 { 1317 if(functionArray.size() > 1) // No need to return when there's only main() 1318 { 1319 emit(sw::Shader::OPCODE_RET); 1320 } 1321 } 1322 1323 currentScope = GLOBAL; 1324 } 1325 break; 1326 case EOpFunctionCall: 1327 if(visit == PostVisit) 1328 { 1329 if(node->isUserDefined()) 1330 { 1331 const TString &name = node->getName(); 1332 const Function *function = findFunction(name); 1333 1334 if(!function) 1335 { 1336 mContext.error(node->getLine(), "function definition not found", name.c_str()); 1337 return false; 1338 } 1339 1340 TIntermSequence &arguments = *function->arg; 1341 1342 for(int i = 0; i < argumentCount; i++) 1343 { 1344 TIntermTyped *in = arguments[i]->getAsTyped(); 1345 1346 if(in->getQualifier() == EvqIn || 1347 in->getQualifier() == EvqInOut || 1348 in->getQualifier() == EvqConstReadOnly) 1349 { 1350 copy(in, arg[i]); 1351 } 1352 } 1353 1354 Instruction *call = emit(sw::Shader::OPCODE_CALL); 1355 call->dst.type = sw::Shader::PARAMETER_LABEL; 1356 call->dst.index = function->label; 1357 1358 if(function->ret && function->ret->getType().getBasicType() != EbtVoid) 1359 { 1360 copy(result, function->ret); 1361 } 1362 1363 for(int i = 0; i < argumentCount; i++) 1364 { 1365 TIntermTyped *argument = arguments[i]->getAsTyped(); 1366 TIntermTyped *out = arg[i]->getAsTyped(); 1367 1368 if(argument->getQualifier() == EvqOut || 1369 argument->getQualifier() == EvqInOut) 1370 { 1371 assignLvalue(out, argument); 1372 } 1373 } 1374 } 1375 else 1376 { 1377 const TextureFunction textureFunction(node->getName()); 1378 TIntermTyped *s = arg[0]->getAsTyped(); 1379 TIntermTyped *t = arg[1]->getAsTyped(); 1380 1381 Temporary coord(this); 1382 1383 if(textureFunction.proj) 1384 { 1385 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]); 1386 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1); 1387 rcp->dst.mask = 0x7; 1388 1389 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord); 1390 mul->dst.mask = 0x7; 1391 1392 if(IsShadowSampler(s->getBasicType())) 1393 { 1394 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1395 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord); 1396 mov->src[0].swizzle = 0xA4; 1397 } 1398 } 1399 else 1400 { 1401 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]); 1402 1403 if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3) 1404 { 1405 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1406 mov->src[0].swizzle = 0xA4; 1407 } 1408 } 1409 1410 switch(textureFunction.method) 1411 { 1412 case TextureFunction::IMPLICIT: 1413 if(!textureFunction.offset) 1414 { 1415 if(argumentCount == 2) 1416 { 1417 emit(sw::Shader::OPCODE_TEX, result, &coord, s); 1418 } 1419 else if(argumentCount == 3) // Bias 1420 { 1421 emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]); 1422 } 1423 else UNREACHABLE(argumentCount); 1424 } 1425 else // Offset 1426 { 1427 if(argumentCount == 3) 1428 { 1429 emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]); 1430 } 1431 else if(argumentCount == 4) // Bias 1432 { 1433 emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]); 1434 } 1435 else UNREACHABLE(argumentCount); 1436 } 1437 break; 1438 case TextureFunction::LOD: 1439 if(!textureFunction.offset && argumentCount == 3) 1440 { 1441 emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]); 1442 } 1443 else if(argumentCount == 4) // Offset 1444 { 1445 emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]); 1446 } 1447 else UNREACHABLE(argumentCount); 1448 break; 1449 case TextureFunction::FETCH: 1450 if(!textureFunction.offset && argumentCount == 3) 1451 { 1452 emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]); 1453 } 1454 else if(argumentCount == 4) // Offset 1455 { 1456 emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]); 1457 } 1458 else UNREACHABLE(argumentCount); 1459 break; 1460 case TextureFunction::GRAD: 1461 if(!textureFunction.offset && argumentCount == 4) 1462 { 1463 emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]); 1464 } 1465 else if(argumentCount == 5) // Offset 1466 { 1467 emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]); 1468 } 1469 else UNREACHABLE(argumentCount); 1470 break; 1471 case TextureFunction::SIZE: 1472 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s); 1473 break; 1474 default: 1475 UNREACHABLE(textureFunction.method); 1476 } 1477 } 1478 } 1479 break; 1480 case EOpParameters: 1481 break; 1482 case EOpConstructFloat: 1483 case EOpConstructVec2: 1484 case EOpConstructVec3: 1485 case EOpConstructVec4: 1486 case EOpConstructBool: 1487 case EOpConstructBVec2: 1488 case EOpConstructBVec3: 1489 case EOpConstructBVec4: 1490 case EOpConstructInt: 1491 case EOpConstructIVec2: 1492 case EOpConstructIVec3: 1493 case EOpConstructIVec4: 1494 case EOpConstructUInt: 1495 case EOpConstructUVec2: 1496 case EOpConstructUVec3: 1497 case EOpConstructUVec4: 1498 if(visit == PostVisit) 1499 { 1500 int component = 0; 1501 int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0; 1502 int arrayComponents = result->getType().getElementSize(); 1503 for(int i = 0; i < argumentCount; i++) 1504 { 1505 TIntermTyped *argi = arg[i]->getAsTyped(); 1506 int size = argi->getNominalSize(); 1507 int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex); 1508 int swizzle = component - (arrayIndex * arrayComponents); 1509 1510 if(!argi->isMatrix()) 1511 { 1512 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1513 mov->dst.mask = (0xF << swizzle) & 0xF; 1514 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1515 1516 component += size; 1517 } 1518 else if(!result->isMatrix()) // Construct a non matrix from a matrix 1519 { 1520 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1521 mov->dst.mask = (0xF << swizzle) & 0xF; 1522 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1523 1524 // At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3 1525 if(result->getNominalSize() > size) 1526 { 1527 Instruction *mov = emitCast(result, arrayIndex, argi, 1); 1528 mov->dst.mask = (0xF << (swizzle + size)) & 0xF; 1529 // mat2: xxxy (0x40), mat3: xxxx (0x00) 1530 mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2); 1531 } 1532 1533 component += size; 1534 } 1535 else // Matrix 1536 { 1537 int column = 0; 1538 1539 while(component < resultType.getNominalSize()) 1540 { 1541 Instruction *mov = emitCast(result, arrayIndex, argi, column); 1542 mov->dst.mask = (0xF << swizzle) & 0xF; 1543 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1544 1545 column++; 1546 component += size; 1547 } 1548 } 1549 } 1550 } 1551 break; 1552 case EOpConstructMat2: 1553 case EOpConstructMat2x3: 1554 case EOpConstructMat2x4: 1555 case EOpConstructMat3x2: 1556 case EOpConstructMat3: 1557 case EOpConstructMat3x4: 1558 case EOpConstructMat4x2: 1559 case EOpConstructMat4x3: 1560 case EOpConstructMat4: 1561 if(visit == PostVisit) 1562 { 1563 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1564 const int outCols = result->getNominalSize(); 1565 const int outRows = result->getSecondarySize(); 1566 1567 if(arg0->isScalar() && arg.size() == 1) // Construct scale matrix 1568 { 1569 for(int i = 0; i < outCols; i++) 1570 { 1571 emit(sw::Shader::OPCODE_MOV, result, i, &zero); 1572 if (i < outRows) 1573 { 1574 // Insert the scalar value on the main diagonal. 1575 // For non-square matrices, Avoid emitting in 1576 // a column which doesn't /have/ a main diagonal 1577 // element, even though it would be fairly benign -- 1578 // it's not necessarily trivial for downstream 1579 // passes to see that this is redundant and strip it 1580 // out. 1581 Instruction *mov = emitCast(result, i, arg0, 0); 1582 mov->dst.mask = 1 << i; 1583 ASSERT(mov->src[0].swizzle == 0x00); 1584 } 1585 } 1586 } 1587 else if(arg0->isMatrix()) 1588 { 1589 int arraySize = result->isArray() ? result->getArraySize() : 1; 1590 1591 for(int n = 0; n < arraySize; n++) 1592 { 1593 TIntermTyped *argi = arg[n]->getAsTyped(); 1594 const int inCols = argi->getNominalSize(); 1595 const int inRows = argi->getSecondarySize(); 1596 1597 for(int i = 0; i < outCols; i++) 1598 { 1599 if(i >= inCols || outRows > inRows) 1600 { 1601 // Initialize to identity matrix 1602 Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f)); 1603 emitCast(result, i + n * outCols, &col, 0); 1604 } 1605 1606 if(i < inCols) 1607 { 1608 Instruction *mov = emitCast(result, i + n * outCols, argi, i); 1609 mov->dst.mask = 0xF >> (4 - inRows); 1610 } 1611 } 1612 } 1613 } 1614 else 1615 { 1616 int column = 0; 1617 int row = 0; 1618 1619 for(int i = 0; i < argumentCount; i++) 1620 { 1621 TIntermTyped *argi = arg[i]->getAsTyped(); 1622 int size = argi->getNominalSize(); 1623 int element = 0; 1624 1625 while(element < size) 1626 { 1627 Instruction *mov = emitCast(result, column, argi, 0); 1628 mov->dst.mask = (0xF << row) & 0xF; 1629 mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element; 1630 1631 int end = row + size - element; 1632 column = end >= outRows ? column + 1 : column; 1633 element = element + outRows - row; 1634 row = end >= outRows ? 0 : end; 1635 } 1636 } 1637 } 1638 } 1639 break; 1640 case EOpConstructStruct: 1641 if(visit == PostVisit) 1642 { 1643 int offset = 0; 1644 for(int i = 0; i < argumentCount; i++) 1645 { 1646 TIntermTyped *argi = arg[i]->getAsTyped(); 1647 int size = argi->totalRegisterCount(); 1648 1649 for(int index = 0; index < size; index++) 1650 { 1651 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index); 1652 mov->dst.mask = writeMask(result, offset + index); 1653 } 1654 1655 offset += size; 1656 } 1657 } 1658 break; 1659 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break; 1660 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break; 1661 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break; 1662 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break; 1663 case EOpVectorEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break; 1664 case EOpVectorNotEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break; 1665 case EOpMod: if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break; 1666 case EOpModf: 1667 if(visit == PostVisit) 1668 { 1669 TIntermTyped* arg1 = arg[1]->getAsTyped(); 1670 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]); 1671 assignLvalue(arg1, arg1); 1672 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1); 1673 } 1674 break; 1675 case EOpPow: if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break; 1676 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break; 1677 case EOpMin: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break; 1678 case EOpMax: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break; 1679 case EOpClamp: 1680 if(visit == PostVisit) 1681 { 1682 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); 1683 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]); 1684 } 1685 break; 1686 case EOpMix: 1687 if(visit == PostVisit) 1688 { 1689 if(arg[2]->getAsTyped()->getBasicType() == EbtBool) 1690 { 1691 emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]); 1692 } 1693 else 1694 { 1695 emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); 1696 } 1697 } 1698 break; 1699 case EOpStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break; 1700 case EOpSmoothStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break; 1701 case EOpDistance: if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break; 1702 case EOpDot: if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break; 1703 case EOpCross: if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break; 1704 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1705 case EOpReflect: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break; 1706 case EOpRefract: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1707 case EOpMul: 1708 if(visit == PostVisit) 1709 { 1710 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1711 ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) && 1712 (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize())); 1713 1714 int size = arg0->getNominalSize(); 1715 for(int i = 0; i < size; i++) 1716 { 1717 emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i); 1718 } 1719 } 1720 break; 1721 case EOpOuterProduct: 1722 if(visit == PostVisit) 1723 { 1724 for(int i = 0; i < dim(arg[1]); i++) 1725 { 1726 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]); 1727 mul->src[1].swizzle = 0x55 * i; 1728 } 1729 } 1730 break; 1731 default: UNREACHABLE(node->getOp()); 1732 } 1733 1734 return true; 1735 } 1736 visitSelection(Visit visit,TIntermSelection * node)1737 bool OutputASM::visitSelection(Visit visit, TIntermSelection *node) 1738 { 1739 if(currentScope != emitScope) 1740 { 1741 return false; 1742 } 1743 1744 TIntermTyped *condition = node->getCondition(); 1745 TIntermNode *trueBlock = node->getTrueBlock(); 1746 TIntermNode *falseBlock = node->getFalseBlock(); 1747 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 1748 1749 condition->traverse(this); 1750 1751 if(node->usesTernaryOperator()) 1752 { 1753 if(constantCondition) 1754 { 1755 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1756 1757 if(trueCondition) 1758 { 1759 trueBlock->traverse(this); 1760 copy(node, trueBlock); 1761 } 1762 else 1763 { 1764 falseBlock->traverse(this); 1765 copy(node, falseBlock); 1766 } 1767 } 1768 else if(trivial(node, 6)) // Fast to compute both potential results and no side effects 1769 { 1770 trueBlock->traverse(this); 1771 falseBlock->traverse(this); 1772 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock); 1773 } 1774 else 1775 { 1776 emit(sw::Shader::OPCODE_IF, 0, condition); 1777 1778 if(trueBlock) 1779 { 1780 trueBlock->traverse(this); 1781 copy(node, trueBlock); 1782 } 1783 1784 if(falseBlock) 1785 { 1786 emit(sw::Shader::OPCODE_ELSE); 1787 falseBlock->traverse(this); 1788 copy(node, falseBlock); 1789 } 1790 1791 emit(sw::Shader::OPCODE_ENDIF); 1792 } 1793 } 1794 else // if/else statement 1795 { 1796 if(constantCondition) 1797 { 1798 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1799 1800 if(trueCondition) 1801 { 1802 if(trueBlock) 1803 { 1804 trueBlock->traverse(this); 1805 } 1806 } 1807 else 1808 { 1809 if(falseBlock) 1810 { 1811 falseBlock->traverse(this); 1812 } 1813 } 1814 } 1815 else 1816 { 1817 emit(sw::Shader::OPCODE_IF, 0, condition); 1818 1819 if(trueBlock) 1820 { 1821 trueBlock->traverse(this); 1822 } 1823 1824 if(falseBlock) 1825 { 1826 emit(sw::Shader::OPCODE_ELSE); 1827 falseBlock->traverse(this); 1828 } 1829 1830 emit(sw::Shader::OPCODE_ENDIF); 1831 } 1832 } 1833 1834 return false; 1835 } 1836 visitLoop(Visit visit,TIntermLoop * node)1837 bool OutputASM::visitLoop(Visit visit, TIntermLoop *node) 1838 { 1839 if(currentScope != emitScope) 1840 { 1841 return false; 1842 } 1843 1844 LoopInfo loop(node); 1845 1846 if(loop.iterations == 0) 1847 { 1848 return false; 1849 } 1850 1851 bool unroll = (loop.iterations <= 4); 1852 1853 TIntermNode *init = node->getInit(); 1854 TIntermTyped *condition = node->getCondition(); 1855 TIntermTyped *expression = node->getExpression(); 1856 TIntermNode *body = node->getBody(); 1857 Constant True(true); 1858 1859 if(loop.isDeterministic()) 1860 { 1861 deterministicVariables.insert(loop.index->getId()); 1862 1863 if(!unroll) 1864 { 1865 emit(sw::Shader::OPCODE_SCALAR); // Unrolled loops don't have an ENDWHILE to disable scalar mode. 1866 } 1867 } 1868 1869 if(node->getType() == ELoopDoWhile) 1870 { 1871 Temporary iterate(this); 1872 emit(sw::Shader::OPCODE_MOV, &iterate, &True); 1873 1874 emit(sw::Shader::OPCODE_WHILE, 0, &iterate); // FIXME: Implement real do-while 1875 1876 if(body) 1877 { 1878 body->traverse(this); 1879 } 1880 1881 emit(sw::Shader::OPCODE_TEST); 1882 1883 condition->traverse(this); 1884 emit(sw::Shader::OPCODE_MOV, &iterate, condition); 1885 1886 emit(sw::Shader::OPCODE_ENDWHILE); 1887 } 1888 else 1889 { 1890 if(init) 1891 { 1892 init->traverse(this); 1893 } 1894 1895 if(unroll) 1896 { 1897 mContext.info(node->getLine(), "loop unrolled", "for"); 1898 1899 for(unsigned int i = 0; i < loop.iterations; i++) 1900 { 1901 // condition->traverse(this); // Condition could contain statements, but not in an unrollable loop 1902 1903 if(body) 1904 { 1905 body->traverse(this); 1906 } 1907 1908 if(expression) 1909 { 1910 expression->traverse(this); 1911 } 1912 } 1913 } 1914 else 1915 { 1916 if(condition) 1917 { 1918 condition->traverse(this); 1919 } 1920 else 1921 { 1922 condition = &True; 1923 } 1924 1925 emit(sw::Shader::OPCODE_WHILE, 0, condition); 1926 1927 if(body) 1928 { 1929 body->traverse(this); 1930 } 1931 1932 emit(sw::Shader::OPCODE_TEST); 1933 1934 if(loop.isDeterministic()) 1935 { 1936 emit(sw::Shader::OPCODE_SCALAR); 1937 } 1938 1939 if(expression) 1940 { 1941 expression->traverse(this); 1942 } 1943 1944 if(condition) 1945 { 1946 condition->traverse(this); 1947 } 1948 1949 emit(sw::Shader::OPCODE_ENDWHILE); 1950 } 1951 } 1952 1953 if(loop.isDeterministic()) 1954 { 1955 deterministicVariables.erase(loop.index->getId()); 1956 } 1957 1958 return false; 1959 } 1960 visitBranch(Visit visit,TIntermBranch * node)1961 bool OutputASM::visitBranch(Visit visit, TIntermBranch *node) 1962 { 1963 if(currentScope != emitScope) 1964 { 1965 return false; 1966 } 1967 1968 switch(node->getFlowOp()) 1969 { 1970 case EOpKill: if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD); break; 1971 case EOpBreak: if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK); break; 1972 case EOpContinue: if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break; 1973 case EOpReturn: 1974 if(visit == PostVisit) 1975 { 1976 TIntermTyped *value = node->getExpression(); 1977 1978 if(value) 1979 { 1980 copy(functionArray[currentFunction].ret, value); 1981 } 1982 1983 emit(sw::Shader::OPCODE_LEAVE); 1984 } 1985 break; 1986 default: UNREACHABLE(node->getFlowOp()); 1987 } 1988 1989 return true; 1990 } 1991 visitSwitch(Visit visit,TIntermSwitch * node)1992 bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node) 1993 { 1994 if(currentScope != emitScope) 1995 { 1996 return false; 1997 } 1998 1999 TIntermTyped* switchValue = node->getInit(); 2000 TIntermAggregate* opList = node->getStatementList(); 2001 2002 if(!switchValue || !opList) 2003 { 2004 return false; 2005 } 2006 2007 switchValue->traverse(this); 2008 2009 emit(sw::Shader::OPCODE_SWITCH); 2010 2011 TIntermSequence& sequence = opList->getSequence(); 2012 TIntermSequence::iterator it = sequence.begin(); 2013 TIntermSequence::iterator defaultIt = sequence.end(); 2014 int nbCases = 0; 2015 for(; it != sequence.end(); ++it) 2016 { 2017 TIntermCase* currentCase = (*it)->getAsCaseNode(); 2018 if(currentCase) 2019 { 2020 TIntermSequence::iterator caseIt = it; 2021 2022 TIntermTyped* condition = currentCase->getCondition(); 2023 if(condition) // non default case 2024 { 2025 if(nbCases != 0) 2026 { 2027 emit(sw::Shader::OPCODE_ELSE); 2028 } 2029 2030 condition->traverse(this); 2031 Temporary result(this); 2032 emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition); 2033 emit(sw::Shader::OPCODE_IF, 0, &result); 2034 nbCases++; 2035 2036 // Emit the code for this case and all subsequent cases until we hit a break statement. 2037 // TODO: This can repeat a lot of code for switches with many fall-through cases. 2038 for(++caseIt; caseIt != sequence.end(); ++caseIt) 2039 { 2040 (*caseIt)->traverse(this); 2041 2042 // Stop if we encounter an unconditional branch (break, continue, return, or kill). 2043 // TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}). 2044 // Note that this eliminates useless operations but shouldn't affect correctness. 2045 if((*caseIt)->getAsBranchNode()) 2046 { 2047 break; 2048 } 2049 } 2050 } 2051 else 2052 { 2053 defaultIt = it; // The default case might not be the last case, keep it for last 2054 } 2055 } 2056 } 2057 2058 // If there's a default case, traverse it here 2059 if(defaultIt != sequence.end()) 2060 { 2061 if(nbCases != 0) 2062 { 2063 emit(sw::Shader::OPCODE_ELSE); 2064 } 2065 2066 for(++defaultIt; defaultIt != sequence.end(); ++defaultIt) 2067 { 2068 (*defaultIt)->traverse(this); 2069 if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return 2070 { 2071 break; 2072 } 2073 } 2074 } 2075 2076 for(int i = 0; i < nbCases; ++i) 2077 { 2078 emit(sw::Shader::OPCODE_ENDIF); 2079 } 2080 2081 emit(sw::Shader::OPCODE_ENDSWITCH); 2082 2083 return false; 2084 } 2085 emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2086 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4) 2087 { 2088 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0); 2089 } 2090 emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2091 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1, 2092 TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4) 2093 { 2094 Instruction *instruction = new Instruction(op); 2095 2096 if(dst) 2097 { 2098 destination(instruction->dst, dst, dstIndex); 2099 } 2100 2101 if(src0) 2102 { 2103 TIntermTyped* src = src0->getAsTyped(); 2104 instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow); 2105 } 2106 2107 source(instruction->src[0], src0, index0); 2108 source(instruction->src[1], src1, index1); 2109 source(instruction->src[2], src2, index2); 2110 source(instruction->src[3], src3, index3); 2111 source(instruction->src[4], src4, index4); 2112 2113 shader->append(instruction); 2114 2115 return instruction; 2116 } 2117 emitCast(TIntermTyped * dst,TIntermTyped * src)2118 Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src) 2119 { 2120 return emitCast(dst, 0, src, 0); 2121 } 2122 emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2123 Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex) 2124 { 2125 switch(src->getBasicType()) 2126 { 2127 case EbtBool: 2128 switch(dst->getBasicType()) 2129 { 2130 case EbtInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2131 case EbtUInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2132 case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex); 2133 default: break; 2134 } 2135 break; 2136 case EbtInt: 2137 switch(dst->getBasicType()) 2138 { 2139 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2140 case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex); 2141 default: break; 2142 } 2143 break; 2144 case EbtUInt: 2145 switch(dst->getBasicType()) 2146 { 2147 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2148 case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex); 2149 default: break; 2150 } 2151 break; 2152 case EbtFloat: 2153 switch(dst->getBasicType()) 2154 { 2155 case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex); 2156 case EbtInt: return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex); 2157 case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex); 2158 default: break; 2159 } 2160 break; 2161 default: 2162 break; 2163 } 2164 2165 ASSERT((src->getBasicType() == dst->getBasicType()) || 2166 ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) || 2167 ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt))); 2168 2169 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex); 2170 } 2171 emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2172 void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2) 2173 { 2174 for(int index = 0; index < dst->elementRegisterCount(); index++) 2175 { 2176 emit(op, dst, index, src0, index, src1, index, src2, index); 2177 } 2178 } 2179 emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2180 void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1) 2181 { 2182 emitBinary(op, result, src0, src1); 2183 assignLvalue(lhs, result); 2184 } 2185 emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2186 void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index) 2187 { 2188 sw::Shader::Opcode opcode; 2189 switch(left->getAsTyped()->getBasicType()) 2190 { 2191 case EbtBool: 2192 case EbtInt: 2193 opcode = sw::Shader::OPCODE_ICMP; 2194 break; 2195 case EbtUInt: 2196 opcode = sw::Shader::OPCODE_UCMP; 2197 break; 2198 default: 2199 opcode = sw::Shader::OPCODE_CMP; 2200 break; 2201 } 2202 2203 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index); 2204 cmp->control = cmpOp; 2205 } 2206 componentCount(const TType & type,int registers)2207 int componentCount(const TType &type, int registers) 2208 { 2209 if(registers == 0) 2210 { 2211 return 0; 2212 } 2213 2214 if(type.isArray() && registers >= type.elementRegisterCount()) 2215 { 2216 int index = registers / type.elementRegisterCount(); 2217 registers -= index * type.elementRegisterCount(); 2218 return index * type.getElementSize() + componentCount(type, registers); 2219 } 2220 2221 if(type.isStruct() || type.isInterfaceBlock()) 2222 { 2223 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2224 int elements = 0; 2225 2226 for(const auto &field : fields) 2227 { 2228 const TType &fieldType = *(field->type()); 2229 2230 if(fieldType.totalRegisterCount() <= registers) 2231 { 2232 registers -= fieldType.totalRegisterCount(); 2233 elements += fieldType.getObjectSize(); 2234 } 2235 else // Register within this field 2236 { 2237 return elements + componentCount(fieldType, registers); 2238 } 2239 } 2240 } 2241 else if(type.isMatrix()) 2242 { 2243 return registers * type.registerSize(); 2244 } 2245 2246 UNREACHABLE(0); 2247 return 0; 2248 } 2249 registerSize(const TType & type,int registers)2250 int registerSize(const TType &type, int registers) 2251 { 2252 if(registers == 0) 2253 { 2254 if(type.isStruct()) 2255 { 2256 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0); 2257 } 2258 else if(type.isInterfaceBlock()) 2259 { 2260 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0); 2261 } 2262 2263 return type.registerSize(); 2264 } 2265 2266 if(type.isArray() && registers >= type.elementRegisterCount()) 2267 { 2268 int index = registers / type.elementRegisterCount(); 2269 registers -= index * type.elementRegisterCount(); 2270 return registerSize(type, registers); 2271 } 2272 2273 if(type.isStruct() || type.isInterfaceBlock()) 2274 { 2275 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2276 int elements = 0; 2277 2278 for(const auto &field : fields) 2279 { 2280 const TType &fieldType = *(field->type()); 2281 2282 if(fieldType.totalRegisterCount() <= registers) 2283 { 2284 registers -= fieldType.totalRegisterCount(); 2285 elements += fieldType.getObjectSize(); 2286 } 2287 else // Register within this field 2288 { 2289 return registerSize(fieldType, registers); 2290 } 2291 } 2292 } 2293 else if(type.isMatrix()) 2294 { 2295 return registerSize(type, 0); 2296 } 2297 2298 UNREACHABLE(0); 2299 return 0; 2300 } 2301 getBlockId(TIntermTyped * arg)2302 int OutputASM::getBlockId(TIntermTyped *arg) 2303 { 2304 if(arg) 2305 { 2306 const TType &type = arg->getType(); 2307 TInterfaceBlock* block = type.getInterfaceBlock(); 2308 if(block && (type.getQualifier() == EvqUniform)) 2309 { 2310 // Make sure the uniform block is declared 2311 uniformRegister(arg); 2312 2313 const char* blockName = block->name().c_str(); 2314 2315 // Fetch uniform block index from array of blocks 2316 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it) 2317 { 2318 if(blockName == it->name) 2319 { 2320 return it->blockId; 2321 } 2322 } 2323 2324 ASSERT(false); 2325 } 2326 } 2327 2328 return -1; 2329 } 2330 getArgumentInfo(TIntermTyped * arg,int index)2331 OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index) 2332 { 2333 const TType &type = arg->getType(); 2334 int blockId = getBlockId(arg); 2335 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1); 2336 if(blockId != -1) 2337 { 2338 argumentInfo.bufferIndex = 0; 2339 for(int i = 0; i < blockId; ++i) 2340 { 2341 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize; 2342 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1; 2343 } 2344 2345 const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId]; 2346 2347 BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end(); 2348 BlockDefinitionIndexMap::const_iterator it = itEnd; 2349 2350 argumentInfo.clampedIndex = index; 2351 if(type.isInterfaceBlock()) 2352 { 2353 // Offset index to the beginning of the selected instance 2354 int blockRegisters = type.elementRegisterCount(); 2355 int bufferOffset = argumentInfo.clampedIndex / blockRegisters; 2356 argumentInfo.bufferIndex += bufferOffset; 2357 argumentInfo.clampedIndex -= bufferOffset * blockRegisters; 2358 } 2359 2360 int regIndex = registerIndex(arg); 2361 for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i) 2362 { 2363 it = blockDefinition.find(i); 2364 if(it != itEnd) 2365 { 2366 argumentInfo.clampedIndex -= (i - regIndex); 2367 break; 2368 } 2369 } 2370 ASSERT(it != itEnd); 2371 2372 argumentInfo.typedMemberInfo = it->second; 2373 2374 int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount(); 2375 argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex; 2376 } 2377 else 2378 { 2379 argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index; 2380 } 2381 2382 return argumentInfo; 2383 } 2384 source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2385 void OutputASM::source(sw::Shader::SourceParameter ¶meter, TIntermNode *argument, int index) 2386 { 2387 if(argument) 2388 { 2389 TIntermTyped *arg = argument->getAsTyped(); 2390 Temporary unpackedUniform(this); 2391 2392 const TType& srcType = arg->getType(); 2393 TInterfaceBlock* srcBlock = srcType.getInterfaceBlock(); 2394 if(srcBlock && (srcType.getQualifier() == EvqUniform)) 2395 { 2396 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2397 const TType &memberType = argumentInfo.typedMemberInfo.type; 2398 2399 if(memberType.getBasicType() == EbtBool) 2400 { 2401 ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize 2402 2403 // Convert the packed bool, which is currently an int, to a true bool 2404 Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B); 2405 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2406 instruction->dst.index = registerIndex(&unpackedUniform); 2407 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2408 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2409 instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride; 2410 2411 shader->append(instruction); 2412 2413 arg = &unpackedUniform; 2414 index = 0; 2415 } 2416 else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix()) 2417 { 2418 int numCols = memberType.getNominalSize(); 2419 int numRows = memberType.getSecondarySize(); 2420 2421 ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize 2422 2423 unsigned int dstIndex = registerIndex(&unpackedUniform); 2424 unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55; 2425 int arrayIndex = argumentInfo.clampedIndex / numCols; 2426 int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride; 2427 2428 for(int j = 0; j < numRows; ++j) 2429 { 2430 // Transpose the row major matrix 2431 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV); 2432 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2433 instruction->dst.index = dstIndex; 2434 instruction->dst.mask = 1 << j; 2435 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2436 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2437 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride; 2438 instruction->src[0].swizzle = srcSwizzle; 2439 2440 shader->append(instruction); 2441 } 2442 2443 arg = &unpackedUniform; 2444 index = 0; 2445 } 2446 } 2447 2448 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2449 const TType &type = argumentInfo.typedMemberInfo.type; 2450 2451 int size = registerSize(type, argumentInfo.clampedIndex); 2452 2453 parameter.type = registerType(arg); 2454 parameter.bufferIndex = argumentInfo.bufferIndex; 2455 2456 if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer()) 2457 { 2458 int component = componentCount(type, argumentInfo.clampedIndex); 2459 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer(); 2460 2461 for(int i = 0; i < 4; i++) 2462 { 2463 if(size == 1) // Replicate 2464 { 2465 parameter.value[i] = constants[component + 0].getAsFloat(); 2466 } 2467 else if(i < size) 2468 { 2469 parameter.value[i] = constants[component + i].getAsFloat(); 2470 } 2471 else 2472 { 2473 parameter.value[i] = 0.0f; 2474 } 2475 } 2476 } 2477 else 2478 { 2479 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex; 2480 2481 if(parameter.bufferIndex != -1) 2482 { 2483 int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride; 2484 parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride; 2485 } 2486 2487 if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS) 2488 { 2489 mContext.error(arg->getLine(), 2490 "Too many temporary registers required to compile shader", 2491 pixelShader ? "pixel shader" : "vertex shader"); 2492 } 2493 } 2494 2495 if(!IsSampler(arg->getBasicType())) 2496 { 2497 parameter.swizzle = readSwizzle(arg, size); 2498 } 2499 } 2500 } 2501 destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2502 void OutputASM::destination(sw::Shader::DestinationParameter ¶meter, TIntermTyped *arg, int index) 2503 { 2504 parameter.type = registerType(arg); 2505 parameter.index = registerIndex(arg) + index; 2506 parameter.mask = writeMask(arg, index); 2507 2508 if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS) 2509 { 2510 mContext.error(arg->getLine(), 2511 "Too many temporary registers required to compile shader", 2512 pixelShader ? "pixel shader" : "vertex shader"); 2513 } 2514 2515 } 2516 copy(TIntermTyped * dst,TIntermNode * src,int offset)2517 void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset) 2518 { 2519 for(int index = 0; index < dst->totalRegisterCount(); index++) 2520 { 2521 emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index); 2522 } 2523 } 2524 swizzleElement(int swizzle,int index)2525 int swizzleElement(int swizzle, int index) 2526 { 2527 return (swizzle >> (index * 2)) & 0x03; 2528 } 2529 swizzleSwizzle(int leftSwizzle,int rightSwizzle)2530 int swizzleSwizzle(int leftSwizzle, int rightSwizzle) 2531 { 2532 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) | 2533 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) | 2534 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) | 2535 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6); 2536 } 2537 assignLvalue(TIntermTyped * dst,TIntermTyped * src)2538 void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src) 2539 { 2540 if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) || 2541 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))) 2542 { 2543 return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix"); 2544 } 2545 2546 TIntermBinary *binary = dst->getAsBinaryNode(); 2547 2548 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar()) 2549 { 2550 Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT); 2551 2552 lvalue(insert->dst, dst); 2553 2554 insert->src[0].type = insert->dst.type; 2555 insert->src[0].index = insert->dst.index; 2556 insert->src[0].rel = insert->dst.rel; 2557 source(insert->src[1], src); 2558 source(insert->src[2], binary->getRight()); 2559 2560 shader->append(insert); 2561 } 2562 else 2563 { 2564 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2565 2566 int swizzle = lvalue(mov1->dst, dst); 2567 2568 source(mov1->src[0], src); 2569 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2570 2571 shader->append(mov1); 2572 2573 for(int offset = 1; offset < dst->totalRegisterCount(); offset++) 2574 { 2575 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV); 2576 2577 mov->dst = mov1->dst; 2578 mov->dst.index += offset; 2579 mov->dst.mask = writeMask(dst, offset); 2580 2581 source(mov->src[0], src, offset); 2582 2583 shader->append(mov); 2584 } 2585 } 2586 } 2587 evaluateRvalue(TIntermTyped * node)2588 void OutputASM::evaluateRvalue(TIntermTyped *node) 2589 { 2590 TIntermBinary *binary = node->getAsBinaryNode(); 2591 2592 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar()) 2593 { 2594 Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT); 2595 2596 destination(insert->dst, node); 2597 2598 Temporary address(this); 2599 unsigned char mask; 2600 TIntermTyped *root = nullptr; 2601 unsigned int offset = 0; 2602 int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node); 2603 2604 source(insert->src[0], root, offset); 2605 insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle); 2606 2607 source(insert->src[1], binary->getRight()); 2608 2609 shader->append(insert); 2610 } 2611 else 2612 { 2613 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2614 2615 destination(mov1->dst, node, 0); 2616 2617 Temporary address(this); 2618 unsigned char mask; 2619 TIntermTyped *root = nullptr; 2620 unsigned int offset = 0; 2621 int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node); 2622 2623 source(mov1->src[0], root, offset); 2624 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2625 2626 shader->append(mov1); 2627 2628 for(int i = 1; i < node->totalRegisterCount(); i++) 2629 { 2630 Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i); 2631 mov->src[0].rel = mov1->src[0].rel; 2632 } 2633 } 2634 } 2635 lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2636 int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node) 2637 { 2638 Temporary address(this); 2639 TIntermTyped *root = nullptr; 2640 unsigned int offset = 0; 2641 unsigned char mask = 0xF; 2642 int swizzle = lvalue(root, offset, dst.rel, mask, address, node); 2643 2644 dst.type = registerType(root); 2645 dst.index = registerIndex(root) + offset; 2646 dst.mask = mask; 2647 2648 return swizzle; 2649 } 2650 lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2651 int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node) 2652 { 2653 TIntermTyped *result = node; 2654 TIntermBinary *binary = node->getAsBinaryNode(); 2655 TIntermSymbol *symbol = node->getAsSymbolNode(); 2656 2657 if(binary) 2658 { 2659 TIntermTyped *left = binary->getLeft(); 2660 TIntermTyped *right = binary->getRight(); 2661 2662 int leftSwizzle = lvalue(root, offset, rel, mask, address, left); // Resolve the l-value of the left side 2663 2664 switch(binary->getOp()) 2665 { 2666 case EOpIndexDirect: 2667 { 2668 int rightIndex = right->getAsConstantUnion()->getIConst(0); 2669 2670 if(left->isRegister()) 2671 { 2672 int leftMask = mask; 2673 2674 mask = 1; 2675 while((leftMask & mask) == 0) 2676 { 2677 mask = mask << 1; 2678 } 2679 2680 int element = swizzleElement(leftSwizzle, rightIndex); 2681 mask = 1 << element; 2682 2683 return element; 2684 } 2685 else if(left->isArray() || left->isMatrix()) 2686 { 2687 offset += rightIndex * result->totalRegisterCount(); 2688 return 0xE4; 2689 } 2690 else UNREACHABLE(0); 2691 } 2692 break; 2693 case EOpIndexIndirect: 2694 { 2695 right->traverse(this); 2696 2697 if(left->isRegister()) 2698 { 2699 // Requires INSERT instruction (handled by calling function) 2700 } 2701 else if(left->isArray() || left->isMatrix()) 2702 { 2703 int scale = result->totalRegisterCount(); 2704 2705 if(rel.type == sw::Shader::PARAMETER_VOID) // Use the index register as the relative address directly 2706 { 2707 if(left->totalRegisterCount() > 1) 2708 { 2709 sw::Shader::SourceParameter relativeRegister; 2710 source(relativeRegister, right); 2711 2712 int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0; 2713 2714 rel.index = relativeRegister.index; 2715 rel.type = relativeRegister.type; 2716 rel.scale = scale; 2717 rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0); 2718 } 2719 } 2720 else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register 2721 { 2722 if(scale == 1) 2723 { 2724 Constant oldScale((int)rel.scale); 2725 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right); 2726 mad->src[0].index = rel.index; 2727 mad->src[0].type = rel.type; 2728 } 2729 else 2730 { 2731 Constant oldScale((int)rel.scale); 2732 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale); 2733 mul->src[0].index = rel.index; 2734 mul->src[0].type = rel.type; 2735 2736 Constant newScale(scale); 2737 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2738 } 2739 2740 rel.type = sw::Shader::PARAMETER_TEMP; 2741 rel.index = registerIndex(&address); 2742 rel.scale = 1; 2743 } 2744 else // Just add the new index to the address register 2745 { 2746 if(scale == 1) 2747 { 2748 emit(sw::Shader::OPCODE_IADD, &address, &address, right); 2749 } 2750 else 2751 { 2752 Constant newScale(scale); 2753 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2754 } 2755 } 2756 } 2757 else UNREACHABLE(0); 2758 } 2759 break; 2760 case EOpIndexDirectStruct: 2761 case EOpIndexDirectInterfaceBlock: 2762 { 2763 const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ? 2764 left->getType().getStruct()->fields() : 2765 left->getType().getInterfaceBlock()->fields(); 2766 int index = right->getAsConstantUnion()->getIConst(0); 2767 int fieldOffset = 0; 2768 2769 for(int i = 0; i < index; i++) 2770 { 2771 fieldOffset += fields[i]->type()->totalRegisterCount(); 2772 } 2773 2774 offset += fieldOffset; 2775 mask = writeMask(result); 2776 2777 return 0xE4; 2778 } 2779 break; 2780 case EOpVectorSwizzle: 2781 { 2782 ASSERT(left->isRegister()); 2783 2784 int leftMask = mask; 2785 2786 int swizzle = 0; 2787 int rightMask = 0; 2788 2789 TIntermSequence &sequence = right->getAsAggregate()->getSequence(); 2790 2791 for(unsigned int i = 0; i < sequence.size(); i++) 2792 { 2793 int index = sequence[i]->getAsConstantUnion()->getIConst(0); 2794 2795 int element = swizzleElement(leftSwizzle, index); 2796 rightMask = rightMask | (1 << element); 2797 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2); 2798 } 2799 2800 mask = leftMask & rightMask; 2801 2802 return swizzle; 2803 } 2804 break; 2805 default: 2806 UNREACHABLE(binary->getOp()); // Not an l-value operator 2807 break; 2808 } 2809 } 2810 else if(symbol) 2811 { 2812 root = symbol; 2813 offset = 0; 2814 mask = writeMask(symbol); 2815 2816 return 0xE4; 2817 } 2818 else 2819 { 2820 node->traverse(this); 2821 2822 root = node; 2823 offset = 0; 2824 mask = writeMask(node); 2825 2826 return 0xE4; 2827 } 2828 2829 return 0xE4; 2830 } 2831 registerType(TIntermTyped * operand)2832 sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand) 2833 { 2834 if(isSamplerRegister(operand)) 2835 { 2836 return sw::Shader::PARAMETER_SAMPLER; 2837 } 2838 2839 const TQualifier qualifier = operand->getQualifier(); 2840 if((qualifier == EvqFragColor) || (qualifier == EvqFragData)) 2841 { 2842 if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) || 2843 ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData))) 2844 { 2845 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", ""); 2846 } 2847 outputQualifier = qualifier; 2848 } 2849 2850 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2851 { 2852 // Constant arrays are in the constant register file. 2853 if(operand->isArray() && operand->getArraySize() > 1) 2854 { 2855 return sw::Shader::PARAMETER_CONST; 2856 } 2857 else 2858 { 2859 return sw::Shader::PARAMETER_TEMP; 2860 } 2861 } 2862 2863 switch(qualifier) 2864 { 2865 case EvqTemporary: return sw::Shader::PARAMETER_TEMP; 2866 case EvqGlobal: return sw::Shader::PARAMETER_TEMP; 2867 case EvqConstExpr: return sw::Shader::PARAMETER_FLOAT4LITERAL; // All converted to float 2868 case EvqAttribute: return sw::Shader::PARAMETER_INPUT; 2869 case EvqVaryingIn: return sw::Shader::PARAMETER_INPUT; 2870 case EvqVaryingOut: return sw::Shader::PARAMETER_OUTPUT; 2871 case EvqVertexIn: return sw::Shader::PARAMETER_INPUT; 2872 case EvqFragmentOut: return sw::Shader::PARAMETER_COLOROUT; 2873 case EvqVertexOut: return sw::Shader::PARAMETER_OUTPUT; 2874 case EvqFragmentIn: return sw::Shader::PARAMETER_INPUT; 2875 case EvqInvariantVaryingIn: return sw::Shader::PARAMETER_INPUT; // FIXME: Guarantee invariance at the backend 2876 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT; // FIXME: Guarantee invariance at the backend 2877 case EvqSmooth: return sw::Shader::PARAMETER_OUTPUT; 2878 case EvqFlat: return sw::Shader::PARAMETER_OUTPUT; 2879 case EvqCentroidOut: return sw::Shader::PARAMETER_OUTPUT; 2880 case EvqSmoothIn: return sw::Shader::PARAMETER_INPUT; 2881 case EvqFlatIn: return sw::Shader::PARAMETER_INPUT; 2882 case EvqCentroidIn: return sw::Shader::PARAMETER_INPUT; 2883 case EvqUniform: return sw::Shader::PARAMETER_CONST; 2884 case EvqIn: return sw::Shader::PARAMETER_TEMP; 2885 case EvqOut: return sw::Shader::PARAMETER_TEMP; 2886 case EvqInOut: return sw::Shader::PARAMETER_TEMP; 2887 case EvqConstReadOnly: return sw::Shader::PARAMETER_TEMP; 2888 case EvqPosition: return sw::Shader::PARAMETER_OUTPUT; 2889 case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT; 2890 case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE; 2891 case EvqVertexID: return sw::Shader::PARAMETER_MISCTYPE; 2892 case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE; 2893 case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE; 2894 case EvqPointCoord: return sw::Shader::PARAMETER_INPUT; 2895 case EvqFragColor: return sw::Shader::PARAMETER_COLOROUT; 2896 case EvqFragData: return sw::Shader::PARAMETER_COLOROUT; 2897 case EvqFragDepth: return sw::Shader::PARAMETER_DEPTHOUT; 2898 default: UNREACHABLE(qualifier); 2899 } 2900 2901 return sw::Shader::PARAMETER_VOID; 2902 } 2903 hasFlatQualifier(TIntermTyped * operand)2904 bool OutputASM::hasFlatQualifier(TIntermTyped *operand) 2905 { 2906 const TQualifier qualifier = operand->getQualifier(); 2907 return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn; 2908 } 2909 registerIndex(TIntermTyped * operand)2910 unsigned int OutputASM::registerIndex(TIntermTyped *operand) 2911 { 2912 if(isSamplerRegister(operand)) 2913 { 2914 return samplerRegister(operand); 2915 } 2916 else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler 2917 { 2918 samplerRegister(operand); // Make sure the sampler is declared 2919 } 2920 2921 switch(operand->getQualifier()) 2922 { 2923 case EvqTemporary: return temporaryRegister(operand); 2924 case EvqGlobal: return temporaryRegister(operand); 2925 case EvqConstExpr: return temporaryRegister(operand); // Unevaluated constant expression 2926 case EvqAttribute: return attributeRegister(operand); 2927 case EvqVaryingIn: return varyingRegister(operand); 2928 case EvqVaryingOut: return varyingRegister(operand); 2929 case EvqVertexIn: return attributeRegister(operand); 2930 case EvqFragmentOut: return fragmentOutputRegister(operand); 2931 case EvqVertexOut: return varyingRegister(operand); 2932 case EvqFragmentIn: return varyingRegister(operand); 2933 case EvqInvariantVaryingIn: return varyingRegister(operand); 2934 case EvqInvariantVaryingOut: return varyingRegister(operand); 2935 case EvqSmooth: return varyingRegister(operand); 2936 case EvqFlat: return varyingRegister(operand); 2937 case EvqCentroidOut: return varyingRegister(operand); 2938 case EvqSmoothIn: return varyingRegister(operand); 2939 case EvqFlatIn: return varyingRegister(operand); 2940 case EvqCentroidIn: return varyingRegister(operand); 2941 case EvqUniform: return uniformRegister(operand); 2942 case EvqIn: return temporaryRegister(operand); 2943 case EvqOut: return temporaryRegister(operand); 2944 case EvqInOut: return temporaryRegister(operand); 2945 case EvqConstReadOnly: return temporaryRegister(operand); 2946 case EvqPosition: return varyingRegister(operand); 2947 case EvqPointSize: return varyingRegister(operand); 2948 case EvqInstanceID: vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex; 2949 case EvqVertexID: vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex; 2950 case EvqFragCoord: pixelShader->declareVPos(); return sw::Shader::VPosIndex; 2951 case EvqFrontFacing: pixelShader->declareVFace(); return sw::Shader::VFaceIndex; 2952 case EvqPointCoord: return varyingRegister(operand); 2953 case EvqFragColor: return 0; 2954 case EvqFragData: return fragmentOutputRegister(operand); 2955 case EvqFragDepth: return 0; 2956 default: UNREACHABLE(operand->getQualifier()); 2957 } 2958 2959 return 0; 2960 } 2961 writeMask(TIntermTyped * destination,int index)2962 int OutputASM::writeMask(TIntermTyped *destination, int index) 2963 { 2964 if(destination->getQualifier() == EvqPointSize) 2965 { 2966 return 0x2; // Point size stored in the y component 2967 } 2968 2969 return 0xF >> (4 - registerSize(destination->getType(), index)); 2970 } 2971 readSwizzle(TIntermTyped * argument,int size)2972 int OutputASM::readSwizzle(TIntermTyped *argument, int size) 2973 { 2974 if(argument->getQualifier() == EvqPointSize) 2975 { 2976 return 0x55; // Point size stored in the y component 2977 } 2978 2979 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4}; // (void), xxxx, xyyy, xyzz, xyzw 2980 2981 return swizzleSize[size]; 2982 } 2983 2984 // Conservatively checks whether an expression is fast to compute and has no side effects trivial(TIntermTyped * expression,int budget)2985 bool OutputASM::trivial(TIntermTyped *expression, int budget) 2986 { 2987 if(!expression->isRegister()) 2988 { 2989 return false; 2990 } 2991 2992 return cost(expression, budget) >= 0; 2993 } 2994 2995 // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects) cost(TIntermNode * expression,int budget)2996 int OutputASM::cost(TIntermNode *expression, int budget) 2997 { 2998 if(budget < 0) 2999 { 3000 return budget; 3001 } 3002 3003 if(expression->getAsSymbolNode()) 3004 { 3005 return budget; 3006 } 3007 else if(expression->getAsConstantUnion()) 3008 { 3009 return budget; 3010 } 3011 else if(expression->getAsBinaryNode()) 3012 { 3013 TIntermBinary *binary = expression->getAsBinaryNode(); 3014 3015 switch(binary->getOp()) 3016 { 3017 case EOpVectorSwizzle: 3018 case EOpIndexDirect: 3019 case EOpIndexDirectStruct: 3020 case EOpIndexDirectInterfaceBlock: 3021 return cost(binary->getLeft(), budget - 0); 3022 case EOpAdd: 3023 case EOpSub: 3024 case EOpMul: 3025 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1)); 3026 default: 3027 return -1; 3028 } 3029 } 3030 else if(expression->getAsUnaryNode()) 3031 { 3032 TIntermUnary *unary = expression->getAsUnaryNode(); 3033 3034 switch(unary->getOp()) 3035 { 3036 case EOpAbs: 3037 case EOpNegative: 3038 return cost(unary->getOperand(), budget - 1); 3039 default: 3040 return -1; 3041 } 3042 } 3043 else if(expression->getAsSelectionNode()) 3044 { 3045 TIntermSelection *selection = expression->getAsSelectionNode(); 3046 3047 if(selection->usesTernaryOperator()) 3048 { 3049 TIntermTyped *condition = selection->getCondition(); 3050 TIntermNode *trueBlock = selection->getTrueBlock(); 3051 TIntermNode *falseBlock = selection->getFalseBlock(); 3052 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 3053 3054 if(constantCondition) 3055 { 3056 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 3057 3058 if(trueCondition) 3059 { 3060 return cost(trueBlock, budget - 0); 3061 } 3062 else 3063 { 3064 return cost(falseBlock, budget - 0); 3065 } 3066 } 3067 else 3068 { 3069 return cost(trueBlock, cost(falseBlock, budget - 2)); 3070 } 3071 } 3072 } 3073 3074 return -1; 3075 } 3076 findFunction(const TString & name)3077 const Function *OutputASM::findFunction(const TString &name) 3078 { 3079 for(unsigned int f = 0; f < functionArray.size(); f++) 3080 { 3081 if(functionArray[f].name == name) 3082 { 3083 return &functionArray[f]; 3084 } 3085 } 3086 3087 return 0; 3088 } 3089 temporaryRegister(TIntermTyped * temporary)3090 int OutputASM::temporaryRegister(TIntermTyped *temporary) 3091 { 3092 int index = allocate(temporaries, temporary); 3093 if(index >= sw::NUM_TEMPORARY_REGISTERS) 3094 { 3095 mContext.error(temporary->getLine(), 3096 "Too many temporary registers required to compile shader", 3097 pixelShader ? "pixel shader" : "vertex shader"); 3098 } 3099 return index; 3100 } 3101 setPixelShaderInputs(const TType & type,int var,bool flat)3102 void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat) 3103 { 3104 if(type.isStruct()) 3105 { 3106 const TFieldList &fields = type.getStruct()->fields(); 3107 int fieldVar = var; 3108 for(const auto &field : fields) 3109 { 3110 const TType& fieldType = *(field->type()); 3111 setPixelShaderInputs(fieldType, fieldVar, flat); 3112 fieldVar += fieldType.totalRegisterCount(); 3113 } 3114 } 3115 else 3116 { 3117 for(int i = 0; i < type.totalRegisterCount(); i++) 3118 { 3119 pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat)); 3120 } 3121 } 3122 } 3123 varyingRegister(TIntermTyped * varying)3124 int OutputASM::varyingRegister(TIntermTyped *varying) 3125 { 3126 int var = lookup(varyings, varying); 3127 3128 if(var == -1) 3129 { 3130 var = allocate(varyings, varying); 3131 if (var == -1) 3132 { 3133 return 0; 3134 } 3135 int registerCount = varying->totalRegisterCount(); 3136 3137 if(pixelShader) 3138 { 3139 if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS) 3140 { 3141 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader"); 3142 return 0; 3143 } 3144 3145 if(varying->getQualifier() == EvqPointCoord) 3146 { 3147 ASSERT(varying->isRegister()); 3148 pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var)); 3149 } 3150 else 3151 { 3152 setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying)); 3153 } 3154 } 3155 else if(vertexShader) 3156 { 3157 if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS) 3158 { 3159 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader"); 3160 return 0; 3161 } 3162 3163 if(varying->getQualifier() == EvqPosition) 3164 { 3165 ASSERT(varying->isRegister()); 3166 vertexShader->setPositionRegister(var); 3167 } 3168 else if(varying->getQualifier() == EvqPointSize) 3169 { 3170 ASSERT(varying->isRegister()); 3171 vertexShader->setPointSizeRegister(var); 3172 } 3173 else 3174 { 3175 // Semantic indexes for user varyings will be assigned during program link to match the pixel shader 3176 } 3177 } 3178 else UNREACHABLE(0); 3179 3180 declareVarying(varying, var); 3181 } 3182 3183 return var; 3184 } 3185 declareVarying(TIntermTyped * varying,int reg)3186 void OutputASM::declareVarying(TIntermTyped *varying, int reg) 3187 { 3188 if(varying->getQualifier() != EvqPointCoord) // gl_PointCoord does not need linking 3189 { 3190 TIntermSymbol *symbol = varying->getAsSymbolNode(); 3191 declareVarying(varying->getType(), symbol->getSymbol(), reg); 3192 } 3193 } 3194 declareVarying(const TType & type,const TString & varyingName,int registerIndex)3195 void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex) 3196 { 3197 const char *name = varyingName.c_str(); 3198 VaryingList &activeVaryings = shaderObject->varyings; 3199 3200 TStructure* structure = type.getStruct(); 3201 if(structure) 3202 { 3203 int fieldRegisterIndex = registerIndex; 3204 3205 const TFieldList &fields = type.getStruct()->fields(); 3206 for(const auto &field : fields) 3207 { 3208 const TType& fieldType = *(field->type()); 3209 declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex); 3210 if(fieldRegisterIndex >= 0) 3211 { 3212 fieldRegisterIndex += fieldType.totalRegisterCount(); 3213 } 3214 } 3215 } 3216 else 3217 { 3218 // Check if this varying has been declared before without having a register assigned 3219 for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++) 3220 { 3221 if(v->name == name) 3222 { 3223 if(registerIndex >= 0) 3224 { 3225 ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex); 3226 v->registerIndex = registerIndex; 3227 } 3228 3229 return; 3230 } 3231 } 3232 3233 activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0)); 3234 } 3235 } 3236 declareFragmentOutput(TIntermTyped * fragmentOutput)3237 void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput) 3238 { 3239 int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location; 3240 int registerCount = fragmentOutput->totalRegisterCount(); 3241 if(requestedLocation < 0) 3242 { 3243 ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier 3244 return; // No requested location 3245 } 3246 else if((requestedLocation + registerCount) > sw::RENDERTARGETS) 3247 { 3248 mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader"); 3249 } 3250 else 3251 { 3252 int currentIndex = lookup(fragmentOutputs, fragmentOutput); 3253 if(requestedLocation != currentIndex) 3254 { 3255 if(currentIndex != -1) 3256 { 3257 mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader"); 3258 } 3259 else 3260 { 3261 if(fragmentOutputs.size() <= (size_t)requestedLocation) 3262 { 3263 while(fragmentOutputs.size() < (size_t)requestedLocation) 3264 { 3265 fragmentOutputs.push_back(nullptr); 3266 } 3267 for(int i = 0; i < registerCount; i++) 3268 { 3269 fragmentOutputs.push_back(fragmentOutput); 3270 } 3271 } 3272 else 3273 { 3274 for(int i = 0; i < registerCount; i++) 3275 { 3276 if(!fragmentOutputs[requestedLocation + i]) 3277 { 3278 fragmentOutputs[requestedLocation + i] = fragmentOutput; 3279 } 3280 else 3281 { 3282 mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader"); 3283 return; 3284 } 3285 } 3286 } 3287 } 3288 } 3289 } 3290 } 3291 uniformRegister(TIntermTyped * uniform)3292 int OutputASM::uniformRegister(TIntermTyped *uniform) 3293 { 3294 const TType &type = uniform->getType(); 3295 ASSERT(!IsSampler(type.getBasicType())); 3296 TInterfaceBlock *block = type.getAsInterfaceBlock(); 3297 TIntermSymbol *symbol = uniform->getAsSymbolNode(); 3298 ASSERT(symbol || block); 3299 3300 if(symbol || block) 3301 { 3302 TInterfaceBlock* parentBlock = type.getInterfaceBlock(); 3303 bool isBlockMember = (!block && parentBlock); 3304 int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform); 3305 3306 if(index == -1 || isBlockMember) 3307 { 3308 if(index == -1) 3309 { 3310 index = allocate(uniforms, uniform); 3311 if (index == -1) 3312 { 3313 return 0; 3314 } 3315 } 3316 3317 // Verify if the current uniform is a member of an already declared block 3318 const TString &name = symbol ? symbol->getSymbol() : block->name(); 3319 int blockMemberIndex = blockMemberLookup(type, name, index); 3320 if(blockMemberIndex == -1) 3321 { 3322 declareUniform(type, name, index, false); 3323 } 3324 else 3325 { 3326 index = blockMemberIndex; 3327 } 3328 } 3329 3330 return index; 3331 } 3332 3333 return 0; 3334 } 3335 attributeRegister(TIntermTyped * attribute)3336 int OutputASM::attributeRegister(TIntermTyped *attribute) 3337 { 3338 ASSERT(!attribute->isArray()); 3339 3340 int index = lookup(attributes, attribute); 3341 3342 if(index == -1) 3343 { 3344 TIntermSymbol *symbol = attribute->getAsSymbolNode(); 3345 ASSERT(symbol); 3346 3347 if(symbol) 3348 { 3349 index = allocate(attributes, attribute); 3350 if (index == -1) 3351 { 3352 return -1; 3353 } 3354 const TType &type = attribute->getType(); 3355 int registerCount = attribute->totalRegisterCount(); 3356 sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT; 3357 switch(type.getBasicType()) 3358 { 3359 case EbtInt: 3360 attribType = sw::VertexShader::ATTRIBTYPE_INT; 3361 break; 3362 case EbtUInt: 3363 attribType = sw::VertexShader::ATTRIBTYPE_UINT; 3364 break; 3365 case EbtFloat: 3366 default: 3367 break; 3368 } 3369 3370 if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS) 3371 { 3372 for(int i = 0; i < registerCount; i++) 3373 { 3374 vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType); 3375 } 3376 } 3377 3378 ActiveAttributes &activeAttributes = shaderObject->activeAttributes; 3379 3380 const char *name = symbol->getSymbol().c_str(); 3381 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index)); 3382 } 3383 } 3384 3385 return index; 3386 } 3387 fragmentOutputRegister(TIntermTyped * fragmentOutput)3388 int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput) 3389 { 3390 return allocate(fragmentOutputs, fragmentOutput); 3391 } 3392 samplerRegister(TIntermTyped * sampler)3393 int OutputASM::samplerRegister(TIntermTyped *sampler) 3394 { 3395 const TType &type = sampler->getType(); 3396 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3397 3398 TIntermSymbol *symbol = sampler->getAsSymbolNode(); 3399 TIntermBinary *binary = sampler->getAsBinaryNode(); 3400 3401 if(symbol) 3402 { 3403 switch(type.getQualifier()) 3404 { 3405 case EvqUniform: 3406 return samplerRegister(symbol); 3407 case EvqIn: 3408 case EvqConstReadOnly: 3409 // Function arguments are not (uniform) sampler registers 3410 return -1; 3411 default: 3412 UNREACHABLE(type.getQualifier()); 3413 } 3414 } 3415 else if(binary) 3416 { 3417 TIntermTyped *left = binary->getLeft(); 3418 TIntermTyped *right = binary->getRight(); 3419 const TType &leftType = left->getType(); 3420 int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0; 3421 int offset = 0; 3422 3423 switch(binary->getOp()) 3424 { 3425 case EOpIndexDirect: 3426 ASSERT(left->isArray()); 3427 offset = index * leftType.samplerRegisterCount(); 3428 break; 3429 case EOpIndexDirectStruct: 3430 ASSERT(leftType.isStruct()); 3431 { 3432 const TFieldList &fields = leftType.getStruct()->fields(); 3433 3434 for(int i = 0; i < index; i++) 3435 { 3436 offset += fields[i]->type()->totalSamplerRegisterCount(); 3437 } 3438 } 3439 break; 3440 case EOpIndexIndirect: // Indirect indexing produces a temporary, not a sampler register 3441 return -1; 3442 case EOpIndexDirectInterfaceBlock: // Interface blocks can't contain samplers 3443 default: 3444 UNREACHABLE(binary->getOp()); 3445 return -1; 3446 } 3447 3448 int base = samplerRegister(left); 3449 3450 if(base < 0) 3451 { 3452 return -1; 3453 } 3454 3455 return base + offset; 3456 } 3457 3458 UNREACHABLE(0); 3459 return -1; // Not a (uniform) sampler register 3460 } 3461 samplerRegister(TIntermSymbol * sampler)3462 int OutputASM::samplerRegister(TIntermSymbol *sampler) 3463 { 3464 const TType &type = sampler->getType(); 3465 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3466 3467 int index = lookup(samplers, sampler); 3468 3469 if(index == -1) 3470 { 3471 index = allocate(samplers, sampler, true); 3472 if (index == -1) 3473 { 3474 return 0; 3475 } 3476 3477 if(sampler->getQualifier() == EvqUniform) 3478 { 3479 const char *name = sampler->getSymbol().c_str(); 3480 declareUniform(type, name, index, true); 3481 } 3482 } 3483 3484 return index; 3485 } 3486 isSamplerRegister(TIntermTyped * operand)3487 bool OutputASM::isSamplerRegister(TIntermTyped *operand) 3488 { 3489 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0; 3490 } 3491 arrayExceedsLimits(TIntermTyped * operand)3492 bool OutputASM::arrayExceedsLimits(TIntermTyped *operand) 3493 { 3494 const TVariable *maxUniformVectors = nullptr; 3495 TString builtinName = ""; 3496 if (vertexShader) 3497 { 3498 builtinName = "gl_MaxVertexUniformVectors"; 3499 } 3500 else if (pixelShader) 3501 { 3502 builtinName = "gl_MaxFragmentUniformVectors"; 3503 } 3504 maxUniformVectors = static_cast<const TVariable *>(mContext.symbolTable.findBuiltIn(builtinName.c_str(), mContext.getShaderVersion())); 3505 if (operand->getArraySize() > maxUniformVectors->getConstPointer()->getIConst()) 3506 { 3507 std::stringstream extraInfoStream; 3508 extraInfoStream << "Array size (" << operand->getArraySize() << ") " 3509 << "exceeds limit of " << builtinName 3510 << " (" << maxUniformVectors->getConstPointer()->getIConst() << ")"; 3511 std::string errorStr = extraInfoStream.str(); 3512 mContext.error(operand->getLine(), errorStr.c_str(), 3513 operand->getBasicString()); 3514 return true; 3515 } 3516 return false; 3517 } 3518 lookup(VariableArray & list,TIntermTyped * variable)3519 int OutputASM::lookup(VariableArray &list, TIntermTyped *variable) 3520 { 3521 for(unsigned int i = 0; i < list.size(); i++) 3522 { 3523 if(list[i] == variable) 3524 { 3525 return i; // Pointer match 3526 } 3527 } 3528 3529 TIntermSymbol *varSymbol = variable->getAsSymbolNode(); 3530 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock(); 3531 3532 if(varBlock) 3533 { 3534 for(unsigned int i = 0; i < list.size(); i++) 3535 { 3536 if(list[i]) 3537 { 3538 TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock(); 3539 3540 if(listBlock) 3541 { 3542 if(listBlock->name() == varBlock->name()) 3543 { 3544 ASSERT(listBlock->arraySize() == varBlock->arraySize()); 3545 ASSERT(listBlock->fields() == varBlock->fields()); 3546 ASSERT(listBlock->blockStorage() == varBlock->blockStorage()); 3547 ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking()); 3548 3549 return i; 3550 } 3551 } 3552 } 3553 } 3554 } 3555 else if(varSymbol) 3556 { 3557 for(unsigned int i = 0; i < list.size(); i++) 3558 { 3559 if(list[i]) 3560 { 3561 TIntermSymbol *listSymbol = list[i]->getAsSymbolNode(); 3562 3563 if(listSymbol) 3564 { 3565 if(listSymbol->getId() == varSymbol->getId()) 3566 { 3567 ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol()); 3568 ASSERT(listSymbol->getType() == varSymbol->getType()); 3569 ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier()); 3570 3571 return i; 3572 } 3573 } 3574 } 3575 } 3576 } 3577 3578 return -1; 3579 } 3580 lookup(VariableArray & list,TInterfaceBlock * block)3581 int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block) 3582 { 3583 for(unsigned int i = 0; i < list.size(); i++) 3584 { 3585 if(list[i] && (list[i]->getType().getInterfaceBlock() == block)) 3586 { 3587 return i; // Pointer match 3588 } 3589 } 3590 return -1; 3591 } 3592 allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3593 int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly) 3594 { 3595 int index = lookup(list, variable); 3596 3597 if(index == -1) 3598 { 3599 if (arrayExceedsLimits(variable)) 3600 { 3601 return -1; 3602 } 3603 unsigned int registerCount = variable->blockRegisterCount(samplersOnly); 3604 3605 for(unsigned int i = 0; i < list.size(); i++) 3606 { 3607 if(list[i] == 0) 3608 { 3609 unsigned int j = 1; 3610 for( ; j < registerCount && (i + j) < list.size(); j++) 3611 { 3612 if(list[i + j] != 0) 3613 { 3614 break; 3615 } 3616 } 3617 3618 if(j == registerCount) // Found free slots 3619 { 3620 for(unsigned int j = 0; j < registerCount; j++) 3621 { 3622 list[i + j] = variable; 3623 } 3624 3625 return i; 3626 } 3627 } 3628 } 3629 3630 index = list.size(); 3631 3632 for(unsigned int i = 0; i < registerCount; i++) 3633 { 3634 list.push_back(variable); 3635 } 3636 } 3637 3638 return index; 3639 } 3640 free(VariableArray & list,TIntermTyped * variable)3641 void OutputASM::free(VariableArray &list, TIntermTyped *variable) 3642 { 3643 int index = lookup(list, variable); 3644 3645 if(index >= 0) 3646 { 3647 list[index] = 0; 3648 } 3649 } 3650 blockMemberLookup(const TType & type,const TString & name,int registerIndex)3651 int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex) 3652 { 3653 const TInterfaceBlock *block = type.getInterfaceBlock(); 3654 3655 if(block) 3656 { 3657 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3658 const TFieldList& fields = block->fields(); 3659 const TString &blockName = block->name(); 3660 int fieldRegisterIndex = registerIndex; 3661 3662 if(!type.isInterfaceBlock()) 3663 { 3664 // This is a uniform that's part of a block, let's see if the block is already defined 3665 for(size_t i = 0; i < activeUniformBlocks.size(); ++i) 3666 { 3667 if(activeUniformBlocks[i].name == blockName.c_str()) 3668 { 3669 // The block is already defined, find the register for the current uniform and return it 3670 for(size_t j = 0; j < fields.size(); j++) 3671 { 3672 const TString &fieldName = fields[j]->name(); 3673 if(fieldName == name) 3674 { 3675 return fieldRegisterIndex; 3676 } 3677 3678 fieldRegisterIndex += fields[j]->type()->totalRegisterCount(); 3679 } 3680 3681 ASSERT(false); 3682 return fieldRegisterIndex; 3683 } 3684 } 3685 } 3686 } 3687 3688 return -1; 3689 } 3690 declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3691 void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder) 3692 { 3693 const TStructure *structure = type.getStruct(); 3694 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr; 3695 3696 if(!structure && !block) 3697 { 3698 ActiveUniforms &activeUniforms = shaderObject->activeUniforms; 3699 const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo(); 3700 if(blockId >= 0) 3701 { 3702 blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type))); 3703 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size()); 3704 } 3705 int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex; 3706 bool isSampler = IsSampler(type.getBasicType()); 3707 if(isSampler && samplersOnly) 3708 { 3709 for(int i = 0; i < type.totalRegisterCount(); i++) 3710 { 3711 shader->declareSampler(fieldRegisterIndex + i); 3712 } 3713 } 3714 if(isSampler == samplersOnly) 3715 { 3716 activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo)); 3717 } 3718 } 3719 else if(block) 3720 { 3721 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3722 const TFieldList& fields = block->fields(); 3723 const TString &blockName = block->name(); 3724 int fieldRegisterIndex = registerIndex; 3725 bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1); 3726 3727 blockId = activeUniformBlocks.size(); 3728 bool isRowMajor = block->matrixPacking() == EmpRowMajor; 3729 activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(), 3730 block->blockStorage(), isRowMajor, registerIndex, blockId)); 3731 blockDefinitions.push_back(BlockDefinitionIndexMap()); 3732 3733 Std140BlockEncoder currentBlockEncoder; 3734 currentBlockEncoder.enterAggregateType(); 3735 for(const auto &field : fields) 3736 { 3737 const TType &fieldType = *(field->type()); 3738 const TString &fieldName = field->name(); 3739 if(isUniformBlockMember && (fieldName == name)) 3740 { 3741 registerIndex = fieldRegisterIndex; 3742 } 3743 3744 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName; 3745 3746 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, ¤tBlockEncoder); 3747 fieldRegisterIndex += fieldType.totalRegisterCount(); 3748 } 3749 currentBlockEncoder.exitAggregateType(); 3750 activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize(); 3751 } 3752 else 3753 { 3754 // Store struct for program link time validation 3755 shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo())); 3756 3757 int fieldRegisterIndex = registerIndex; 3758 3759 const TFieldList& fields = structure->fields(); 3760 if(type.isArray() && (structure || type.isInterfaceBlock())) 3761 { 3762 for(int i = 0; i < type.getArraySize(); i++) 3763 { 3764 if(encoder) 3765 { 3766 encoder->enterAggregateType(); 3767 } 3768 for(const auto &field : fields) 3769 { 3770 const TType &fieldType = *(field->type()); 3771 const TString &fieldName = field->name(); 3772 const TString uniformName = name + "[" + str(i) + "]." + fieldName; 3773 3774 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3775 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3776 } 3777 if(encoder) 3778 { 3779 encoder->exitAggregateType(); 3780 } 3781 } 3782 } 3783 else 3784 { 3785 if(encoder) 3786 { 3787 encoder->enterAggregateType(); 3788 } 3789 for(const auto &field : fields) 3790 { 3791 const TType &fieldType = *(field->type()); 3792 const TString &fieldName = field->name(); 3793 const TString uniformName = name + "." + fieldName; 3794 3795 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3796 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3797 } 3798 if(encoder) 3799 { 3800 encoder->exitAggregateType(); 3801 } 3802 } 3803 } 3804 } 3805 dim(TIntermNode * v)3806 int OutputASM::dim(TIntermNode *v) 3807 { 3808 TIntermTyped *vector = v->getAsTyped(); 3809 ASSERT(vector && vector->isRegister()); 3810 return vector->getNominalSize(); 3811 } 3812 dim2(TIntermNode * m)3813 int OutputASM::dim2(TIntermNode *m) 3814 { 3815 TIntermTyped *matrix = m->getAsTyped(); 3816 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray()); 3817 return matrix->getSecondarySize(); 3818 } 3819 3820 // Sets iterations to ~0u if no loop count could be statically determined. LoopInfo(TIntermLoop * node)3821 OutputASM::LoopInfo::LoopInfo(TIntermLoop *node) 3822 { 3823 // Parse loops of the form: 3824 // for(int index = initial; index [comparator] limit; index [op] increment) 3825 3826 // Parse index name and intial value 3827 if(node->getInit()) 3828 { 3829 TIntermAggregate *init = node->getInit()->getAsAggregate(); 3830 3831 if(init) 3832 { 3833 TIntermSequence &sequence = init->getSequence(); 3834 TIntermTyped *variable = sequence[0]->getAsTyped(); 3835 3836 if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt) 3837 { 3838 TIntermBinary *assign = variable->getAsBinaryNode(); 3839 3840 if(assign && assign->getOp() == EOpInitialize) 3841 { 3842 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode(); 3843 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion(); 3844 3845 if(symbol && constant) 3846 { 3847 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3848 { 3849 index = symbol; 3850 initial = constant->getUnionArrayPointer()[0].getIConst(); 3851 } 3852 } 3853 } 3854 } 3855 } 3856 } 3857 3858 // Parse comparator and limit value 3859 if(index && node->getCondition()) 3860 { 3861 TIntermBinary *test = node->getCondition()->getAsBinaryNode(); 3862 TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr; 3863 3864 if(left && (left->getId() == index->getId())) 3865 { 3866 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion(); 3867 3868 if(constant) 3869 { 3870 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3871 { 3872 comparator = test->getOp(); 3873 limit = constant->getUnionArrayPointer()[0].getIConst(); 3874 } 3875 } 3876 } 3877 } 3878 3879 // Parse increment 3880 if(index && comparator != EOpNull && node->getExpression()) 3881 { 3882 TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode(); 3883 TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode(); 3884 3885 if(binaryTerminal) 3886 { 3887 TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode(); 3888 3889 if(operand && operand->getId() == index->getId()) 3890 { 3891 TOperator op = binaryTerminal->getOp(); 3892 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion(); 3893 3894 if(constant) 3895 { 3896 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3897 { 3898 int value = constant->getUnionArrayPointer()[0].getIConst(); 3899 3900 switch(op) 3901 { 3902 case EOpAddAssign: increment = value; break; 3903 case EOpSubAssign: increment = -value; break; 3904 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3905 } 3906 } 3907 } 3908 } 3909 } 3910 else if(unaryTerminal) 3911 { 3912 TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode(); 3913 3914 if(operand && operand->getId() == index->getId()) 3915 { 3916 TOperator op = unaryTerminal->getOp(); 3917 3918 switch(op) 3919 { 3920 case EOpPostIncrement: increment = 1; break; 3921 case EOpPostDecrement: increment = -1; break; 3922 case EOpPreIncrement: increment = 1; break; 3923 case EOpPreDecrement: increment = -1; break; 3924 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3925 } 3926 } 3927 } 3928 } 3929 3930 if(index && comparator != EOpNull && increment != 0) 3931 { 3932 // Check the loop body for return statements or changes to the index variable that make it non-deterministic. 3933 LoopUnrollable loopUnrollable; 3934 bool unrollable = loopUnrollable.traverse(node, index->getId()); 3935 3936 if(!unrollable) 3937 { 3938 iterations = ~0u; 3939 return; 3940 } 3941 3942 if(comparator == EOpLessThanEqual) 3943 { 3944 comparator = EOpLessThan; 3945 limit += 1; 3946 } 3947 else if(comparator == EOpGreaterThanEqual) 3948 { 3949 comparator = EOpLessThan; 3950 limit -= 1; 3951 std::swap(initial, limit); 3952 increment = -increment; 3953 } 3954 else if(comparator == EOpGreaterThan) 3955 { 3956 comparator = EOpLessThan; 3957 std::swap(initial, limit); 3958 increment = -increment; 3959 } 3960 3961 if(comparator == EOpLessThan) 3962 { 3963 if(!(initial < limit)) // Never loops 3964 { 3965 iterations = 0; 3966 } 3967 else if(increment < 0) 3968 { 3969 iterations = ~0u; 3970 } 3971 else 3972 { 3973 iterations = (limit - initial + abs(increment) - 1) / increment; // Ceiling division 3974 } 3975 } 3976 else 3977 { 3978 // Rare cases left unhandled. Treated as non-deterministic. 3979 iterations = ~0u; 3980 } 3981 } 3982 } 3983 traverse(TIntermLoop * loop,int indexId)3984 bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId) 3985 { 3986 loopUnrollable = true; 3987 3988 loopIndexId = indexId; 3989 TIntermNode *body = loop->getBody(); 3990 3991 if(body) 3992 { 3993 body->traverse(this); 3994 } 3995 3996 return loopUnrollable; 3997 } 3998 visitSymbol(TIntermSymbol * node)3999 void LoopUnrollable::visitSymbol(TIntermSymbol *node) 4000 { 4001 // Check that the loop index is not used as the argument to a function out or inout parameter. 4002 if(node->getId() == loopIndexId) 4003 { 4004 if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut) 4005 { 4006 loopUnrollable = false; 4007 } 4008 } 4009 } 4010 visitBinary(Visit visit,TIntermBinary * node)4011 bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node) 4012 { 4013 if(!loopUnrollable) 4014 { 4015 return false; 4016 } 4017 4018 // Check that the loop index is not statically assigned to. 4019 TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode(); 4020 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 4021 4022 return loopUnrollable; 4023 } 4024 visitUnary(Visit visit,TIntermUnary * node)4025 bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node) 4026 { 4027 if(!loopUnrollable) 4028 { 4029 return false; 4030 } 4031 4032 // Check that the loop index is not statically assigned to. 4033 TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode(); 4034 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 4035 4036 return loopUnrollable; 4037 } 4038 visitBranch(Visit visit,TIntermBranch * node)4039 bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node) 4040 { 4041 if(!loopUnrollable) 4042 { 4043 return false; 4044 } 4045 4046 switch(node->getFlowOp()) 4047 { 4048 case EOpKill: 4049 case EOpReturn: 4050 case EOpBreak: 4051 case EOpContinue: 4052 loopUnrollable = false; 4053 break; 4054 default: UNREACHABLE(node->getFlowOp()); 4055 } 4056 4057 return loopUnrollable; 4058 } 4059 visitAggregate(Visit visit,TIntermAggregate * node)4060 bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node) 4061 { 4062 return loopUnrollable; 4063 } 4064 } 4065