1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "OutputASM.h" 16 #include "Common/Math.hpp" 17 18 #include "common/debug.h" 19 #include "InfoSink.h" 20 21 #include "libGLESv2/Shader.h" 22 23 #include <GLES2/gl2.h> 24 #include <GLES2/gl2ext.h> 25 #include <GLES3/gl3.h> 26 #include <GL/glcorearb.h> 27 #include <GL/glext.h> 28 29 #include <stdlib.h> 30 31 namespace 32 { glVariableType(const TType & type)33 GLenum glVariableType(const TType &type) 34 { 35 switch(type.getBasicType()) 36 { 37 case EbtFloat: 38 if(type.isScalar()) 39 { 40 return GL_FLOAT; 41 } 42 else if(type.isVector()) 43 { 44 switch(type.getNominalSize()) 45 { 46 case 2: return GL_FLOAT_VEC2; 47 case 3: return GL_FLOAT_VEC3; 48 case 4: return GL_FLOAT_VEC4; 49 default: UNREACHABLE(type.getNominalSize()); 50 } 51 } 52 else if(type.isMatrix()) 53 { 54 switch(type.getNominalSize()) 55 { 56 case 2: 57 switch(type.getSecondarySize()) 58 { 59 case 2: return GL_FLOAT_MAT2; 60 case 3: return GL_FLOAT_MAT2x3; 61 case 4: return GL_FLOAT_MAT2x4; 62 default: UNREACHABLE(type.getSecondarySize()); 63 } 64 case 3: 65 switch(type.getSecondarySize()) 66 { 67 case 2: return GL_FLOAT_MAT3x2; 68 case 3: return GL_FLOAT_MAT3; 69 case 4: return GL_FLOAT_MAT3x4; 70 default: UNREACHABLE(type.getSecondarySize()); 71 } 72 case 4: 73 switch(type.getSecondarySize()) 74 { 75 case 2: return GL_FLOAT_MAT4x2; 76 case 3: return GL_FLOAT_MAT4x3; 77 case 4: return GL_FLOAT_MAT4; 78 default: UNREACHABLE(type.getSecondarySize()); 79 } 80 default: UNREACHABLE(type.getNominalSize()); 81 } 82 } 83 else UNREACHABLE(0); 84 break; 85 case EbtInt: 86 if(type.isScalar()) 87 { 88 return GL_INT; 89 } 90 else if(type.isVector()) 91 { 92 switch(type.getNominalSize()) 93 { 94 case 2: return GL_INT_VEC2; 95 case 3: return GL_INT_VEC3; 96 case 4: return GL_INT_VEC4; 97 default: UNREACHABLE(type.getNominalSize()); 98 } 99 } 100 else UNREACHABLE(0); 101 break; 102 case EbtUInt: 103 if(type.isScalar()) 104 { 105 return GL_UNSIGNED_INT; 106 } 107 else if(type.isVector()) 108 { 109 switch(type.getNominalSize()) 110 { 111 case 2: return GL_UNSIGNED_INT_VEC2; 112 case 3: return GL_UNSIGNED_INT_VEC3; 113 case 4: return GL_UNSIGNED_INT_VEC4; 114 default: UNREACHABLE(type.getNominalSize()); 115 } 116 } 117 else UNREACHABLE(0); 118 break; 119 case EbtBool: 120 if(type.isScalar()) 121 { 122 return GL_BOOL; 123 } 124 else if(type.isVector()) 125 { 126 switch(type.getNominalSize()) 127 { 128 case 2: return GL_BOOL_VEC2; 129 case 3: return GL_BOOL_VEC3; 130 case 4: return GL_BOOL_VEC4; 131 default: UNREACHABLE(type.getNominalSize()); 132 } 133 } 134 else UNREACHABLE(0); 135 break; 136 case EbtSampler2D: 137 return GL_SAMPLER_2D; 138 case EbtISampler2D: 139 return GL_INT_SAMPLER_2D; 140 case EbtUSampler2D: 141 return GL_UNSIGNED_INT_SAMPLER_2D; 142 case EbtSamplerCube: 143 return GL_SAMPLER_CUBE; 144 case EbtSampler2DRect: 145 return GL_SAMPLER_2D_RECT_ARB; 146 case EbtISamplerCube: 147 return GL_INT_SAMPLER_CUBE; 148 case EbtUSamplerCube: 149 return GL_UNSIGNED_INT_SAMPLER_CUBE; 150 case EbtSamplerExternalOES: 151 return GL_SAMPLER_EXTERNAL_OES; 152 case EbtSampler3D: 153 return GL_SAMPLER_3D_OES; 154 case EbtISampler3D: 155 return GL_INT_SAMPLER_3D; 156 case EbtUSampler3D: 157 return GL_UNSIGNED_INT_SAMPLER_3D; 158 case EbtSampler2DArray: 159 return GL_SAMPLER_2D_ARRAY; 160 case EbtISampler2DArray: 161 return GL_INT_SAMPLER_2D_ARRAY; 162 case EbtUSampler2DArray: 163 return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY; 164 case EbtSampler2DShadow: 165 return GL_SAMPLER_2D_SHADOW; 166 case EbtSamplerCubeShadow: 167 return GL_SAMPLER_CUBE_SHADOW; 168 case EbtSampler2DArrayShadow: 169 return GL_SAMPLER_2D_ARRAY_SHADOW; 170 default: 171 UNREACHABLE(type.getBasicType()); 172 break; 173 } 174 175 return GL_NONE; 176 } 177 glVariablePrecision(const TType & type)178 GLenum glVariablePrecision(const TType &type) 179 { 180 if(type.getBasicType() == EbtFloat) 181 { 182 switch(type.getPrecision()) 183 { 184 case EbpHigh: return GL_HIGH_FLOAT; 185 case EbpMedium: return GL_MEDIUM_FLOAT; 186 case EbpLow: return GL_LOW_FLOAT; 187 case EbpUndefined: 188 // Should be defined as the default precision by the parser 189 default: UNREACHABLE(type.getPrecision()); 190 } 191 } 192 else if(type.getBasicType() == EbtInt) 193 { 194 switch(type.getPrecision()) 195 { 196 case EbpHigh: return GL_HIGH_INT; 197 case EbpMedium: return GL_MEDIUM_INT; 198 case EbpLow: return GL_LOW_INT; 199 case EbpUndefined: 200 // Should be defined as the default precision by the parser 201 default: UNREACHABLE(type.getPrecision()); 202 } 203 } 204 205 // Other types (boolean, sampler) don't have a precision 206 return GL_NONE; 207 } 208 } 209 210 namespace glsl 211 { 212 // Integer to TString conversion str(int i)213 TString str(int i) 214 { 215 char buffer[20]; 216 sprintf(buffer, "%d", i); 217 return buffer; 218 } 219 220 class Temporary : public TIntermSymbol 221 { 222 public: Temporary(OutputASM * assembler)223 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler) 224 { 225 } 226 ~Temporary()227 ~Temporary() 228 { 229 assembler->freeTemporary(this); 230 } 231 232 private: 233 OutputASM *const assembler; 234 }; 235 236 class Constant : public TIntermConstantUnion 237 { 238 public: Constant(float x,float y,float z,float w)239 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false)) 240 { 241 constants[0].setFConst(x); 242 constants[1].setFConst(y); 243 constants[2].setFConst(z); 244 constants[3].setFConst(w); 245 } 246 Constant(bool b)247 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false)) 248 { 249 constants[0].setBConst(b); 250 } 251 Constant(int i)252 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false)) 253 { 254 constants[0].setIConst(i); 255 } 256 ~Constant()257 ~Constant() 258 { 259 } 260 261 private: 262 ConstantUnion constants[4]; 263 }; 264 ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) : 266 type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)), 267 name(name), arraySize(type.getArraySize()), registerIndex(registerIndex) 268 { 269 if(type.isStruct()) 270 { 271 for(const auto& field : type.getStruct()->fields()) 272 { 273 fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1)); 274 } 275 } 276 } 277 Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) : 279 ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo) 280 { 281 } 282 UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize, 284 TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) : 285 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout), 286 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId) 287 { 288 } 289 BlockLayoutEncoder()290 BlockLayoutEncoder::BlockLayoutEncoder() 291 : mCurrentOffset(0) 292 { 293 } 294 encodeType(const TType & type)295 BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type) 296 { 297 int arrayStride; 298 int matrixStride; 299 300 bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor; 301 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride); 302 303 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent), 304 static_cast<int>(arrayStride * BytesPerComponent), 305 static_cast<int>(matrixStride * BytesPerComponent), 306 (matrixStride > 0) && isRowMajor); 307 308 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride); 309 310 return memberInfo; 311 } 312 313 // static getBlockRegister(const BlockMemberInfo & info)314 size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info) 315 { 316 return (info.offset / BytesPerComponent) / ComponentsPerRegister; 317 } 318 319 // static getBlockRegisterElement(const BlockMemberInfo & info)320 size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info) 321 { 322 return (info.offset / BytesPerComponent) % ComponentsPerRegister; 323 } 324 nextRegister()325 void BlockLayoutEncoder::nextRegister() 326 { 327 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister); 328 } 329 Std140BlockEncoder()330 Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder() 331 { 332 } 333 enterAggregateType()334 void Std140BlockEncoder::enterAggregateType() 335 { 336 nextRegister(); 337 } 338 exitAggregateType()339 void Std140BlockEncoder::exitAggregateType() 340 { 341 nextRegister(); 342 } 343 getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut) 345 { 346 size_t baseAlignment = 0; 347 int matrixStride = 0; 348 int arrayStride = 0; 349 350 if(type.isMatrix()) 351 { 352 baseAlignment = ComponentsPerRegister; 353 matrixStride = ComponentsPerRegister; 354 355 if(arraySize > 0) 356 { 357 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 358 arrayStride = ComponentsPerRegister * numRegisters; 359 } 360 } 361 else if(arraySize > 0) 362 { 363 baseAlignment = ComponentsPerRegister; 364 arrayStride = ComponentsPerRegister; 365 } 366 else 367 { 368 const size_t numComponents = type.getElementSize(); 369 baseAlignment = (numComponents == 3 ? 4u : numComponents); 370 } 371 372 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment); 373 374 *matrixStrideOut = matrixStride; 375 *arrayStrideOut = arrayStride; 376 } 377 advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride) 379 { 380 if(arraySize > 0) 381 { 382 mCurrentOffset += arrayStride * arraySize; 383 } 384 else if(type.isMatrix()) 385 { 386 ASSERT(matrixStride == ComponentsPerRegister); 387 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 388 mCurrentOffset += ComponentsPerRegister * numRegisters; 389 } 390 else 391 { 392 mCurrentOffset += type.getElementSize(); 393 } 394 } 395 Attribute()396 Attribute::Attribute() 397 { 398 type = GL_NONE; 399 arraySize = 0; 400 registerIndex = 0; 401 } 402 Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex) 404 { 405 this->type = type; 406 this->name = name; 407 this->arraySize = arraySize; 408 this->layoutLocation = layoutLocation; 409 this->registerIndex = registerIndex; 410 } 411 getPixelShader() const412 sw::PixelShader *Shader::getPixelShader() const 413 { 414 return nullptr; 415 } 416 getVertexShader() const417 sw::VertexShader *Shader::getVertexShader() const 418 { 419 return nullptr; 420 } 421 TextureFunction(const TString & nodeName)422 OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false) 423 { 424 TString name = TFunction::unmangleName(nodeName); 425 426 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect") 427 { 428 method = IMPLICIT; 429 } 430 else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj") 431 { 432 method = IMPLICIT; 433 proj = true; 434 } 435 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod") 436 { 437 method = LOD; 438 } 439 else if(name == "texture2DProjLod" || name == "textureProjLod") 440 { 441 method = LOD; 442 proj = true; 443 } 444 else if(name == "textureSize") 445 { 446 method = SIZE; 447 } 448 else if(name == "textureOffset") 449 { 450 method = IMPLICIT; 451 offset = true; 452 } 453 else if(name == "textureProjOffset") 454 { 455 method = IMPLICIT; 456 offset = true; 457 proj = true; 458 } 459 else if(name == "textureLodOffset") 460 { 461 method = LOD; 462 offset = true; 463 } 464 else if(name == "textureProjLodOffset") 465 { 466 method = LOD; 467 proj = true; 468 offset = true; 469 } 470 else if(name == "texelFetch") 471 { 472 method = FETCH; 473 } 474 else if(name == "texelFetchOffset") 475 { 476 method = FETCH; 477 offset = true; 478 } 479 else if(name == "textureGrad") 480 { 481 method = GRAD; 482 } 483 else if(name == "textureGradOffset") 484 { 485 method = GRAD; 486 offset = true; 487 } 488 else if(name == "textureProjGrad") 489 { 490 method = GRAD; 491 proj = true; 492 } 493 else if(name == "textureProjGradOffset") 494 { 495 method = GRAD; 496 proj = true; 497 offset = true; 498 } 499 else UNREACHABLE(0); 500 } 501 OutputASM(TParseContext & context,Shader * shaderObject)502 OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context) 503 { 504 shader = nullptr; 505 pixelShader = nullptr; 506 vertexShader = nullptr; 507 508 if(shaderObject) 509 { 510 shader = shaderObject->getShader(); 511 pixelShader = shaderObject->getPixelShader(); 512 vertexShader = shaderObject->getVertexShader(); 513 } 514 515 functionArray.push_back(Function(0, "main(", nullptr, nullptr)); 516 currentFunction = 0; 517 outputQualifier = EvqOutput; // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData 518 } 519 ~OutputASM()520 OutputASM::~OutputASM() 521 { 522 } 523 output()524 void OutputASM::output() 525 { 526 if(shader) 527 { 528 emitShader(GLOBAL); 529 530 if(functionArray.size() > 1) // Only call main() when there are other functions 531 { 532 Instruction *callMain = emit(sw::Shader::OPCODE_CALL); 533 callMain->dst.type = sw::Shader::PARAMETER_LABEL; 534 callMain->dst.index = 0; // main() 535 536 emit(sw::Shader::OPCODE_RET); 537 } 538 539 emitShader(FUNCTION); 540 } 541 } 542 emitShader(Scope scope)543 void OutputASM::emitShader(Scope scope) 544 { 545 emitScope = scope; 546 currentScope = GLOBAL; 547 mContext.getTreeRoot()->traverse(this); 548 } 549 freeTemporary(Temporary * temporary)550 void OutputASM::freeTemporary(Temporary *temporary) 551 { 552 free(temporaries, temporary); 553 } 554 getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const 556 { 557 TBasicType baseType = in->getType().getBasicType(); 558 559 switch(op) 560 { 561 case sw::Shader::OPCODE_NEG: 562 switch(baseType) 563 { 564 case EbtInt: 565 case EbtUInt: 566 return sw::Shader::OPCODE_INEG; 567 case EbtFloat: 568 default: 569 return op; 570 } 571 case sw::Shader::OPCODE_ABS: 572 switch(baseType) 573 { 574 case EbtInt: 575 return sw::Shader::OPCODE_IABS; 576 case EbtFloat: 577 default: 578 return op; 579 } 580 case sw::Shader::OPCODE_SGN: 581 switch(baseType) 582 { 583 case EbtInt: 584 return sw::Shader::OPCODE_ISGN; 585 case EbtFloat: 586 default: 587 return op; 588 } 589 case sw::Shader::OPCODE_ADD: 590 switch(baseType) 591 { 592 case EbtInt: 593 case EbtUInt: 594 return sw::Shader::OPCODE_IADD; 595 case EbtFloat: 596 default: 597 return op; 598 } 599 case sw::Shader::OPCODE_SUB: 600 switch(baseType) 601 { 602 case EbtInt: 603 case EbtUInt: 604 return sw::Shader::OPCODE_ISUB; 605 case EbtFloat: 606 default: 607 return op; 608 } 609 case sw::Shader::OPCODE_MUL: 610 switch(baseType) 611 { 612 case EbtInt: 613 case EbtUInt: 614 return sw::Shader::OPCODE_IMUL; 615 case EbtFloat: 616 default: 617 return op; 618 } 619 case sw::Shader::OPCODE_DIV: 620 switch(baseType) 621 { 622 case EbtInt: 623 return sw::Shader::OPCODE_IDIV; 624 case EbtUInt: 625 return sw::Shader::OPCODE_UDIV; 626 case EbtFloat: 627 default: 628 return op; 629 } 630 case sw::Shader::OPCODE_IMOD: 631 return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op; 632 case sw::Shader::OPCODE_ISHR: 633 return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op; 634 case sw::Shader::OPCODE_MIN: 635 switch(baseType) 636 { 637 case EbtInt: 638 return sw::Shader::OPCODE_IMIN; 639 case EbtUInt: 640 return sw::Shader::OPCODE_UMIN; 641 case EbtFloat: 642 default: 643 return op; 644 } 645 case sw::Shader::OPCODE_MAX: 646 switch(baseType) 647 { 648 case EbtInt: 649 return sw::Shader::OPCODE_IMAX; 650 case EbtUInt: 651 return sw::Shader::OPCODE_UMAX; 652 case EbtFloat: 653 default: 654 return op; 655 } 656 default: 657 return op; 658 } 659 } 660 visitSymbol(TIntermSymbol * symbol)661 void OutputASM::visitSymbol(TIntermSymbol *symbol) 662 { 663 // The type of vertex outputs and fragment inputs with the same name must match (validated at link time), 664 // so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code). 665 switch(symbol->getQualifier()) 666 { 667 case EvqVaryingIn: 668 case EvqVaryingOut: 669 case EvqInvariantVaryingIn: 670 case EvqInvariantVaryingOut: 671 case EvqVertexOut: 672 case EvqFragmentIn: 673 if(symbol->getBasicType() != EbtInvariant) // Typeless declarations are not new varyings 674 { 675 declareVarying(symbol, -1); 676 } 677 break; 678 case EvqFragmentOut: 679 declareFragmentOutput(symbol); 680 break; 681 default: 682 break; 683 } 684 685 TInterfaceBlock* block = symbol->getType().getInterfaceBlock(); 686 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables: 687 // "All members of a named uniform block declared with a shared or std140 layout qualifier 688 // are considered active, even if they are not referenced in any shader in the program. 689 // The uniform block itself is also considered active, even if no member of the block is referenced." 690 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140))) 691 { 692 uniformRegister(symbol); 693 } 694 } 695 visitBinary(Visit visit,TIntermBinary * node)696 bool OutputASM::visitBinary(Visit visit, TIntermBinary *node) 697 { 698 if(currentScope != emitScope) 699 { 700 return false; 701 } 702 703 TIntermTyped *result = node; 704 TIntermTyped *left = node->getLeft(); 705 TIntermTyped *right = node->getRight(); 706 const TType &leftType = left->getType(); 707 const TType &rightType = right->getType(); 708 709 if(isSamplerRegister(result)) 710 { 711 return false; // Don't traverse, the register index is determined statically 712 } 713 714 switch(node->getOp()) 715 { 716 case EOpAssign: 717 assert(visit == PreVisit); 718 right->traverse(this); 719 assignLvalue(left, right); 720 copy(result, right); 721 return false; 722 case EOpInitialize: 723 assert(visit == PreVisit); 724 // Constant arrays go into the constant register file. 725 if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1) 726 { 727 for(int i = 0; i < left->totalRegisterCount(); i++) 728 { 729 emit(sw::Shader::OPCODE_DEF, left, i, right, i); 730 } 731 } 732 else 733 { 734 right->traverse(this); 735 copy(left, right); 736 } 737 return false; 738 case EOpMatrixTimesScalarAssign: 739 assert(visit == PreVisit); 740 right->traverse(this); 741 for(int i = 0; i < leftType.getNominalSize(); i++) 742 { 743 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right); 744 } 745 746 assignLvalue(left, result); 747 return false; 748 case EOpVectorTimesMatrixAssign: 749 assert(visit == PreVisit); 750 { 751 // The left operand may contain a swizzle serving double-duty as 752 // swizzle and writemask, so it's important that we traverse it 753 // first. Otherwise we may end up never setting up our left 754 // operand correctly. 755 left->traverse(this); 756 right->traverse(this); 757 int size = leftType.getNominalSize(); 758 759 for(int i = 0; i < size; i++) 760 { 761 Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i); 762 dot->dst.mask = 1 << i; 763 } 764 765 assignLvalue(left, result); 766 } 767 return false; 768 case EOpMatrixTimesMatrixAssign: 769 assert(visit == PreVisit); 770 { 771 right->traverse(this); 772 int dim = leftType.getNominalSize(); 773 774 for(int i = 0; i < dim; i++) 775 { 776 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 777 mul->src[1].swizzle = 0x00; 778 779 for(int j = 1; j < dim; j++) 780 { 781 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 782 mad->src[1].swizzle = j * 0x55; 783 } 784 } 785 786 assignLvalue(left, result); 787 } 788 return false; 789 case EOpIndexDirect: 790 case EOpIndexIndirect: 791 case EOpIndexDirectStruct: 792 case EOpIndexDirectInterfaceBlock: 793 assert(visit == PreVisit); 794 evaluateRvalue(node); 795 return false; 796 case EOpVectorSwizzle: 797 if(visit == PostVisit) 798 { 799 int swizzle = 0; 800 TIntermAggregate *components = right->getAsAggregate(); 801 802 if(components) 803 { 804 TIntermSequence &sequence = components->getSequence(); 805 int component = 0; 806 807 for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++) 808 { 809 TIntermConstantUnion *element = (*sit)->getAsConstantUnion(); 810 811 if(element) 812 { 813 int i = element->getUnionArrayPointer()[0].getIConst(); 814 swizzle |= i << (component * 2); 815 component++; 816 } 817 else UNREACHABLE(0); 818 } 819 } 820 else UNREACHABLE(0); 821 822 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left); 823 mov->src[0].swizzle = swizzle; 824 } 825 break; 826 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break; 827 case EOpAdd: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right); break; 828 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break; 829 case EOpSub: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right); break; 830 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break; 831 case EOpMul: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right); break; 832 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break; 833 case EOpDiv: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right); break; 834 case EOpIModAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break; 835 case EOpIMod: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right); break; 836 case EOpBitShiftLeftAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break; 837 case EOpBitShiftLeft: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right); break; 838 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break; 839 case EOpBitShiftRight: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right); break; 840 case EOpBitwiseAndAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break; 841 case EOpBitwiseAnd: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right); break; 842 case EOpBitwiseXorAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break; 843 case EOpBitwiseXor: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right); break; 844 case EOpBitwiseOrAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right); break; 845 case EOpBitwiseOr: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right); break; 846 case EOpEqual: 847 if(visit == PostVisit) 848 { 849 emitBinary(sw::Shader::OPCODE_EQ, result, left, right); 850 851 for(int index = 1; index < left->totalRegisterCount(); index++) 852 { 853 Temporary equal(this); 854 emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index); 855 emit(sw::Shader::OPCODE_AND, result, result, &equal); 856 } 857 } 858 break; 859 case EOpNotEqual: 860 if(visit == PostVisit) 861 { 862 emitBinary(sw::Shader::OPCODE_NE, result, left, right); 863 864 for(int index = 1; index < left->totalRegisterCount(); index++) 865 { 866 Temporary notEqual(this); 867 emit(sw::Shader::OPCODE_NE, ¬Equal, 0, left, index, right, index); 868 emit(sw::Shader::OPCODE_OR, result, result, ¬Equal); 869 } 870 } 871 break; 872 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break; 873 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break; 874 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break; 875 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break; 876 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break; 877 case EOpVectorTimesScalar: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break; 878 case EOpMatrixTimesScalar: 879 if(visit == PostVisit) 880 { 881 if(left->isMatrix()) 882 { 883 for(int i = 0; i < leftType.getNominalSize(); i++) 884 { 885 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0); 886 } 887 } 888 else if(right->isMatrix()) 889 { 890 for(int i = 0; i < rightType.getNominalSize(); i++) 891 { 892 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 893 } 894 } 895 else UNREACHABLE(0); 896 } 897 break; 898 case EOpVectorTimesMatrix: 899 if(visit == PostVisit) 900 { 901 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize()); 902 903 int size = rightType.getNominalSize(); 904 for(int i = 0; i < size; i++) 905 { 906 Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i); 907 dot->dst.mask = 1 << i; 908 } 909 } 910 break; 911 case EOpMatrixTimesVector: 912 if(visit == PostVisit) 913 { 914 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right); 915 mul->src[1].swizzle = 0x00; 916 917 int size = rightType.getNominalSize(); 918 for(int i = 1; i < size; i++) 919 { 920 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result); 921 mad->src[1].swizzle = i * 0x55; 922 } 923 } 924 break; 925 case EOpMatrixTimesMatrix: 926 if(visit == PostVisit) 927 { 928 int dim = leftType.getNominalSize(); 929 930 int size = rightType.getNominalSize(); 931 for(int i = 0; i < size; i++) 932 { 933 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 934 mul->src[1].swizzle = 0x00; 935 936 for(int j = 1; j < dim; j++) 937 { 938 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 939 mad->src[1].swizzle = j * 0x55; 940 } 941 } 942 } 943 break; 944 case EOpLogicalOr: 945 if(trivial(right, 6)) 946 { 947 if(visit == PostVisit) 948 { 949 emit(sw::Shader::OPCODE_OR, result, left, right); 950 } 951 } 952 else // Short-circuit evaluation 953 { 954 if(visit == InVisit) 955 { 956 emit(sw::Shader::OPCODE_MOV, result, left); 957 Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result); 958 ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT; 959 } 960 else if(visit == PostVisit) 961 { 962 emit(sw::Shader::OPCODE_MOV, result, right); 963 emit(sw::Shader::OPCODE_ENDIF); 964 } 965 } 966 break; 967 case EOpLogicalXor: if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break; 968 case EOpLogicalAnd: 969 if(trivial(right, 6)) 970 { 971 if(visit == PostVisit) 972 { 973 emit(sw::Shader::OPCODE_AND, result, left, right); 974 } 975 } 976 else // Short-circuit evaluation 977 { 978 if(visit == InVisit) 979 { 980 emit(sw::Shader::OPCODE_MOV, result, left); 981 emit(sw::Shader::OPCODE_IF, 0, result); 982 } 983 else if(visit == PostVisit) 984 { 985 emit(sw::Shader::OPCODE_MOV, result, right); 986 emit(sw::Shader::OPCODE_ENDIF); 987 } 988 } 989 break; 990 default: UNREACHABLE(node->getOp()); 991 } 992 993 return true; 994 } 995 emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow) 997 { 998 switch(size) 999 { 1000 case 1: // Used for cofactor computation only 1001 { 1002 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1003 bool isMov = (row == col); 1004 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG; 1005 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row); 1006 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col); 1007 mov->dst.mask = 1 << outRow; 1008 } 1009 break; 1010 case 2: 1011 { 1012 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy 1013 1014 bool isCofactor = (col >= 0) && (row >= 0); 1015 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1016 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1017 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1018 1019 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1); 1020 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2]; 1021 det->dst.mask = 1 << outRow; 1022 } 1023 break; 1024 case 3: 1025 { 1026 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw 1027 1028 bool isCofactor = (col >= 0) && (row >= 0); 1029 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 1030 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 1031 int col2 = (isCofactor && (col <= 2)) ? 3 : 2; 1032 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 1033 1034 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2); 1035 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3]; 1036 det->dst.mask = 1 << outRow; 1037 } 1038 break; 1039 case 4: 1040 { 1041 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3); 1042 det->dst.mask = 1 << outRow; 1043 } 1044 break; 1045 default: 1046 UNREACHABLE(size); 1047 break; 1048 } 1049 } 1050 visitUnary(Visit visit,TIntermUnary * node)1051 bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) 1052 { 1053 if(currentScope != emitScope) 1054 { 1055 return false; 1056 } 1057 1058 TIntermTyped *result = node; 1059 TIntermTyped *arg = node->getOperand(); 1060 TBasicType basicType = arg->getType().getBasicType(); 1061 1062 union 1063 { 1064 float f; 1065 int i; 1066 } one_value; 1067 1068 if(basicType == EbtInt || basicType == EbtUInt) 1069 { 1070 one_value.i = 1; 1071 } 1072 else 1073 { 1074 one_value.f = 1.0f; 1075 } 1076 1077 Constant one(one_value.f, one_value.f, one_value.f, one_value.f); 1078 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f); 1079 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f); 1080 1081 switch(node->getOp()) 1082 { 1083 case EOpNegative: 1084 if(visit == PostVisit) 1085 { 1086 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg); 1087 for(int index = 0; index < arg->totalRegisterCount(); index++) 1088 { 1089 emit(negOpcode, result, index, arg, index); 1090 } 1091 } 1092 break; 1093 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1094 case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1095 case EOpBitwiseNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 1096 case EOpPostIncrement: 1097 if(visit == PostVisit) 1098 { 1099 copy(result, arg); 1100 1101 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1102 for(int index = 0; index < arg->totalRegisterCount(); index++) 1103 { 1104 emit(addOpcode, arg, index, arg, index, &one); 1105 } 1106 1107 assignLvalue(arg, arg); 1108 } 1109 break; 1110 case EOpPostDecrement: 1111 if(visit == PostVisit) 1112 { 1113 copy(result, arg); 1114 1115 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1116 for(int index = 0; index < arg->totalRegisterCount(); index++) 1117 { 1118 emit(subOpcode, arg, index, arg, index, &one); 1119 } 1120 1121 assignLvalue(arg, arg); 1122 } 1123 break; 1124 case EOpPreIncrement: 1125 if(visit == PostVisit) 1126 { 1127 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 1128 for(int index = 0; index < arg->totalRegisterCount(); index++) 1129 { 1130 emit(addOpcode, result, index, arg, index, &one); 1131 } 1132 1133 assignLvalue(arg, result); 1134 } 1135 break; 1136 case EOpPreDecrement: 1137 if(visit == PostVisit) 1138 { 1139 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 1140 for(int index = 0; index < arg->totalRegisterCount(); index++) 1141 { 1142 emit(subOpcode, result, index, arg, index, &one); 1143 } 1144 1145 assignLvalue(arg, result); 1146 } 1147 break; 1148 case EOpRadians: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break; 1149 case EOpDegrees: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, °); break; 1150 case EOpSin: if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break; 1151 case EOpCos: if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break; 1152 case EOpTan: if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break; 1153 case EOpAsin: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break; 1154 case EOpAcos: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break; 1155 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break; 1156 case EOpSinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break; 1157 case EOpCosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break; 1158 case EOpTanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break; 1159 case EOpAsinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break; 1160 case EOpAcosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break; 1161 case EOpAtanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break; 1162 case EOpExp: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break; 1163 case EOpLog: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break; 1164 case EOpExp2: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break; 1165 case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break; 1166 case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break; 1167 case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break; 1168 case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break; 1169 case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break; 1170 case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break; 1171 case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break; 1172 case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break; 1173 case EOpRoundEven: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break; 1174 case EOpCeil: if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break; 1175 case EOpFract: if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break; 1176 case EOpIsNan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break; 1177 case EOpIsInf: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break; 1178 case EOpLength: if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break; 1179 case EOpNormalize: if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break; 1180 case EOpDFdx: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break; 1181 case EOpDFdy: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break; 1182 case EOpFwidth: if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break; 1183 case EOpAny: if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break; 1184 case EOpAll: if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break; 1185 case EOpFloatBitsToInt: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break; 1186 case EOpFloatBitsToUint: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break; 1187 case EOpIntBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break; 1188 case EOpUintBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break; 1189 case EOpPackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break; 1190 case EOpPackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break; 1191 case EOpPackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break; 1192 case EOpUnpackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break; 1193 case EOpUnpackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break; 1194 case EOpUnpackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break; 1195 case EOpTranspose: 1196 if(visit == PostVisit) 1197 { 1198 int numCols = arg->getNominalSize(); 1199 int numRows = arg->getSecondarySize(); 1200 for(int i = 0; i < numCols; ++i) 1201 { 1202 for(int j = 0; j < numRows; ++j) 1203 { 1204 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i); 1205 mov->src[0].swizzle = 0x55 * j; 1206 mov->dst.mask = 1 << i; 1207 } 1208 } 1209 } 1210 break; 1211 case EOpDeterminant: 1212 if(visit == PostVisit) 1213 { 1214 int size = arg->getNominalSize(); 1215 ASSERT(size == arg->getSecondarySize()); 1216 1217 emitDeterminant(result, arg, size); 1218 } 1219 break; 1220 case EOpInverse: 1221 if(visit == PostVisit) 1222 { 1223 int size = arg->getNominalSize(); 1224 ASSERT(size == arg->getSecondarySize()); 1225 1226 // Compute transposed matrix of cofactors 1227 for(int i = 0; i < size; ++i) 1228 { 1229 for(int j = 0; j < size; ++j) 1230 { 1231 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1232 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant 1233 emitDeterminant(result, arg, size - 1, j, i, i, j); 1234 } 1235 } 1236 1237 // Compute 1 / determinant 1238 Temporary invDet(this); 1239 emitDeterminant(&invDet, arg, size); 1240 Constant one(1.0f, 1.0f, 1.0f, 1.0f); 1241 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet); 1242 div->src[1].swizzle = 0x00; // xxxx 1243 1244 // Divide transposed matrix of cofactors by determinant 1245 for(int i = 0; i < size; ++i) 1246 { 1247 emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet); 1248 } 1249 } 1250 break; 1251 default: UNREACHABLE(node->getOp()); 1252 } 1253 1254 return true; 1255 } 1256 visitAggregate(Visit visit,TIntermAggregate * node)1257 bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node) 1258 { 1259 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence) 1260 { 1261 return false; 1262 } 1263 1264 Constant zero(0.0f, 0.0f, 0.0f, 0.0f); 1265 1266 TIntermTyped *result = node; 1267 const TType &resultType = node->getType(); 1268 TIntermSequence &arg = node->getSequence(); 1269 int argumentCount = static_cast<int>(arg.size()); 1270 1271 switch(node->getOp()) 1272 { 1273 case EOpSequence: break; 1274 case EOpDeclaration: break; 1275 case EOpInvariantDeclaration: break; 1276 case EOpPrototype: break; 1277 case EOpComma: 1278 if(visit == PostVisit) 1279 { 1280 copy(result, arg[1]); 1281 } 1282 break; 1283 case EOpFunction: 1284 if(visit == PreVisit) 1285 { 1286 const TString &name = node->getName(); 1287 1288 if(emitScope == FUNCTION) 1289 { 1290 if(functionArray.size() > 1) // No need for a label when there's only main() 1291 { 1292 Instruction *label = emit(sw::Shader::OPCODE_LABEL); 1293 label->dst.type = sw::Shader::PARAMETER_LABEL; 1294 1295 const Function *function = findFunction(name); 1296 ASSERT(function); // Should have been added during global pass 1297 label->dst.index = function->label; 1298 currentFunction = function->label; 1299 } 1300 } 1301 else if(emitScope == GLOBAL) 1302 { 1303 if(name != "main(") 1304 { 1305 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence(); 1306 functionArray.push_back(Function(functionArray.size(), name, &arguments, node)); 1307 } 1308 } 1309 else UNREACHABLE(emitScope); 1310 1311 currentScope = FUNCTION; 1312 } 1313 else if(visit == PostVisit) 1314 { 1315 if(emitScope == FUNCTION) 1316 { 1317 if(functionArray.size() > 1) // No need to return when there's only main() 1318 { 1319 emit(sw::Shader::OPCODE_RET); 1320 } 1321 } 1322 1323 currentScope = GLOBAL; 1324 } 1325 break; 1326 case EOpFunctionCall: 1327 if(visit == PostVisit) 1328 { 1329 if(node->isUserDefined()) 1330 { 1331 const TString &name = node->getName(); 1332 const Function *function = findFunction(name); 1333 1334 if(!function) 1335 { 1336 mContext.error(node->getLine(), "function definition not found", name.c_str()); 1337 return false; 1338 } 1339 1340 TIntermSequence &arguments = *function->arg; 1341 1342 for(int i = 0; i < argumentCount; i++) 1343 { 1344 TIntermTyped *in = arguments[i]->getAsTyped(); 1345 1346 if(in->getQualifier() == EvqIn || 1347 in->getQualifier() == EvqInOut || 1348 in->getQualifier() == EvqConstReadOnly) 1349 { 1350 copy(in, arg[i]); 1351 } 1352 } 1353 1354 Instruction *call = emit(sw::Shader::OPCODE_CALL); 1355 call->dst.type = sw::Shader::PARAMETER_LABEL; 1356 call->dst.index = function->label; 1357 1358 if(function->ret && function->ret->getType().getBasicType() != EbtVoid) 1359 { 1360 copy(result, function->ret); 1361 } 1362 1363 for(int i = 0; i < argumentCount; i++) 1364 { 1365 TIntermTyped *argument = arguments[i]->getAsTyped(); 1366 TIntermTyped *out = arg[i]->getAsTyped(); 1367 1368 if(argument->getQualifier() == EvqOut || 1369 argument->getQualifier() == EvqInOut) 1370 { 1371 assignLvalue(out, argument); 1372 } 1373 } 1374 } 1375 else 1376 { 1377 const TextureFunction textureFunction(node->getName()); 1378 TIntermTyped *s = arg[0]->getAsTyped(); 1379 TIntermTyped *t = arg[1]->getAsTyped(); 1380 1381 Temporary coord(this); 1382 1383 if(textureFunction.proj) 1384 { 1385 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]); 1386 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1); 1387 rcp->dst.mask = 0x7; 1388 1389 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord); 1390 mul->dst.mask = 0x7; 1391 1392 if(IsShadowSampler(s->getBasicType())) 1393 { 1394 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1395 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord); 1396 mov->src[0].swizzle = 0xA4; 1397 } 1398 } 1399 else 1400 { 1401 Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]); 1402 1403 if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3) 1404 { 1405 ASSERT(s->getBasicType() == EbtSampler2DShadow); 1406 mov->src[0].swizzle = 0xA4; 1407 } 1408 } 1409 1410 switch(textureFunction.method) 1411 { 1412 case TextureFunction::IMPLICIT: 1413 if(!textureFunction.offset) 1414 { 1415 if(argumentCount == 2) 1416 { 1417 emit(sw::Shader::OPCODE_TEX, result, &coord, s); 1418 } 1419 else if(argumentCount == 3) // Bias 1420 { 1421 emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]); 1422 } 1423 else UNREACHABLE(argumentCount); 1424 } 1425 else // Offset 1426 { 1427 if(argumentCount == 3) 1428 { 1429 emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]); 1430 } 1431 else if(argumentCount == 4) // Bias 1432 { 1433 emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]); 1434 } 1435 else UNREACHABLE(argumentCount); 1436 } 1437 break; 1438 case TextureFunction::LOD: 1439 if(!textureFunction.offset && argumentCount == 3) 1440 { 1441 emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]); 1442 } 1443 else if(argumentCount == 4) // Offset 1444 { 1445 emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]); 1446 } 1447 else UNREACHABLE(argumentCount); 1448 break; 1449 case TextureFunction::FETCH: 1450 if(!textureFunction.offset && argumentCount == 3) 1451 { 1452 emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]); 1453 } 1454 else if(argumentCount == 4) // Offset 1455 { 1456 emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]); 1457 } 1458 else UNREACHABLE(argumentCount); 1459 break; 1460 case TextureFunction::GRAD: 1461 if(!textureFunction.offset && argumentCount == 4) 1462 { 1463 emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]); 1464 } 1465 else if(argumentCount == 5) // Offset 1466 { 1467 emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]); 1468 } 1469 else UNREACHABLE(argumentCount); 1470 break; 1471 case TextureFunction::SIZE: 1472 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s); 1473 break; 1474 default: 1475 UNREACHABLE(textureFunction.method); 1476 } 1477 } 1478 } 1479 break; 1480 case EOpParameters: 1481 break; 1482 case EOpConstructFloat: 1483 case EOpConstructVec2: 1484 case EOpConstructVec3: 1485 case EOpConstructVec4: 1486 case EOpConstructBool: 1487 case EOpConstructBVec2: 1488 case EOpConstructBVec3: 1489 case EOpConstructBVec4: 1490 case EOpConstructInt: 1491 case EOpConstructIVec2: 1492 case EOpConstructIVec3: 1493 case EOpConstructIVec4: 1494 case EOpConstructUInt: 1495 case EOpConstructUVec2: 1496 case EOpConstructUVec3: 1497 case EOpConstructUVec4: 1498 if(visit == PostVisit) 1499 { 1500 int component = 0; 1501 int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0; 1502 int arrayComponents = result->getType().getElementSize(); 1503 for(int i = 0; i < argumentCount; i++) 1504 { 1505 TIntermTyped *argi = arg[i]->getAsTyped(); 1506 int size = argi->getNominalSize(); 1507 int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex); 1508 int swizzle = component - (arrayIndex * arrayComponents); 1509 1510 if(!argi->isMatrix()) 1511 { 1512 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1513 mov->dst.mask = (0xF << swizzle) & 0xF; 1514 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1515 1516 component += size; 1517 } 1518 else if(!result->isMatrix()) // Construct a non matrix from a matrix 1519 { 1520 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1521 mov->dst.mask = (0xF << swizzle) & 0xF; 1522 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1523 1524 // At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3 1525 if(result->getNominalSize() > size) 1526 { 1527 Instruction *mov = emitCast(result, arrayIndex, argi, 1); 1528 mov->dst.mask = (0xF << (swizzle + size)) & 0xF; 1529 // mat2: xxxy (0x40), mat3: xxxx (0x00) 1530 mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2); 1531 } 1532 1533 component += size; 1534 } 1535 else // Matrix 1536 { 1537 int column = 0; 1538 1539 while(component < resultType.getNominalSize()) 1540 { 1541 Instruction *mov = emitCast(result, arrayIndex, argi, column); 1542 mov->dst.mask = (0xF << swizzle) & 0xF; 1543 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1544 1545 column++; 1546 component += size; 1547 } 1548 } 1549 } 1550 } 1551 break; 1552 case EOpConstructMat2: 1553 case EOpConstructMat2x3: 1554 case EOpConstructMat2x4: 1555 case EOpConstructMat3x2: 1556 case EOpConstructMat3: 1557 case EOpConstructMat3x4: 1558 case EOpConstructMat4x2: 1559 case EOpConstructMat4x3: 1560 case EOpConstructMat4: 1561 if(visit == PostVisit) 1562 { 1563 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1564 const int outCols = result->getNominalSize(); 1565 const int outRows = result->getSecondarySize(); 1566 1567 if(arg0->isScalar() && arg.size() == 1) // Construct scale matrix 1568 { 1569 for(int i = 0; i < outCols; i++) 1570 { 1571 emit(sw::Shader::OPCODE_MOV, result, i, &zero); 1572 if (i < outRows) 1573 { 1574 // Insert the scalar value on the main diagonal. 1575 // For non-square matrices, Avoid emitting in 1576 // a column which doesn't /have/ a main diagonal 1577 // element, even though it would be fairly benign -- 1578 // it's not necessarily trivial for downstream 1579 // passes to see that this is redundant and strip it 1580 // out. 1581 Instruction *mov = emitCast(result, i, arg0, 0); 1582 mov->dst.mask = 1 << i; 1583 ASSERT(mov->src[0].swizzle == 0x00); 1584 } 1585 } 1586 } 1587 else if(arg0->isMatrix()) 1588 { 1589 int arraySize = result->isArray() ? result->getArraySize() : 1; 1590 1591 for(int n = 0; n < arraySize; n++) 1592 { 1593 TIntermTyped *argi = arg[n]->getAsTyped(); 1594 const int inCols = argi->getNominalSize(); 1595 const int inRows = argi->getSecondarySize(); 1596 1597 for(int i = 0; i < outCols; i++) 1598 { 1599 if(i >= inCols || outRows > inRows) 1600 { 1601 // Initialize to identity matrix 1602 Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f)); 1603 emitCast(result, i + n * outCols, &col, 0); 1604 } 1605 1606 if(i < inCols) 1607 { 1608 Instruction *mov = emitCast(result, i + n * outCols, argi, i); 1609 mov->dst.mask = 0xF >> (4 - inRows); 1610 } 1611 } 1612 } 1613 } 1614 else 1615 { 1616 int column = 0; 1617 int row = 0; 1618 1619 for(int i = 0; i < argumentCount; i++) 1620 { 1621 TIntermTyped *argi = arg[i]->getAsTyped(); 1622 int size = argi->getNominalSize(); 1623 int element = 0; 1624 1625 while(element < size) 1626 { 1627 Instruction *mov = emitCast(result, column, argi, 0); 1628 mov->dst.mask = (0xF << row) & 0xF; 1629 mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element; 1630 1631 int end = row + size - element; 1632 column = end >= outRows ? column + 1 : column; 1633 element = element + outRows - row; 1634 row = end >= outRows ? 0 : end; 1635 } 1636 } 1637 } 1638 } 1639 break; 1640 case EOpConstructStruct: 1641 if(visit == PostVisit) 1642 { 1643 int offset = 0; 1644 for(int i = 0; i < argumentCount; i++) 1645 { 1646 TIntermTyped *argi = arg[i]->getAsTyped(); 1647 int size = argi->totalRegisterCount(); 1648 1649 for(int index = 0; index < size; index++) 1650 { 1651 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index); 1652 mov->dst.mask = writeMask(result, offset + index); 1653 } 1654 1655 offset += size; 1656 } 1657 } 1658 break; 1659 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break; 1660 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break; 1661 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break; 1662 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break; 1663 case EOpVectorEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break; 1664 case EOpVectorNotEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break; 1665 case EOpMod: if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break; 1666 case EOpModf: 1667 if(visit == PostVisit) 1668 { 1669 TIntermTyped* arg1 = arg[1]->getAsTyped(); 1670 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]); 1671 assignLvalue(arg1, arg1); 1672 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1); 1673 } 1674 break; 1675 case EOpPow: if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break; 1676 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break; 1677 case EOpMin: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break; 1678 case EOpMax: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break; 1679 case EOpClamp: 1680 if(visit == PostVisit) 1681 { 1682 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); 1683 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]); 1684 } 1685 break; 1686 case EOpMix: 1687 if(visit == PostVisit) 1688 { 1689 if(arg[2]->getAsTyped()->getBasicType() == EbtBool) 1690 { 1691 emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]); 1692 } 1693 else 1694 { 1695 emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); 1696 } 1697 } 1698 break; 1699 case EOpStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break; 1700 case EOpSmoothStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break; 1701 case EOpDistance: if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break; 1702 case EOpDot: if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break; 1703 case EOpCross: if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break; 1704 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1705 case EOpReflect: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break; 1706 case EOpRefract: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1707 case EOpMul: 1708 if(visit == PostVisit) 1709 { 1710 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1711 ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) && 1712 (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize())); 1713 1714 int size = arg0->getNominalSize(); 1715 for(int i = 0; i < size; i++) 1716 { 1717 emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i); 1718 } 1719 } 1720 break; 1721 case EOpOuterProduct: 1722 if(visit == PostVisit) 1723 { 1724 for(int i = 0; i < dim(arg[1]); i++) 1725 { 1726 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]); 1727 mul->src[1].swizzle = 0x55 * i; 1728 } 1729 } 1730 break; 1731 default: UNREACHABLE(node->getOp()); 1732 } 1733 1734 return true; 1735 } 1736 visitSelection(Visit visit,TIntermSelection * node)1737 bool OutputASM::visitSelection(Visit visit, TIntermSelection *node) 1738 { 1739 if(currentScope != emitScope) 1740 { 1741 return false; 1742 } 1743 1744 TIntermTyped *condition = node->getCondition(); 1745 TIntermNode *trueBlock = node->getTrueBlock(); 1746 TIntermNode *falseBlock = node->getFalseBlock(); 1747 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 1748 1749 condition->traverse(this); 1750 1751 if(node->usesTernaryOperator()) 1752 { 1753 if(constantCondition) 1754 { 1755 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1756 1757 if(trueCondition) 1758 { 1759 trueBlock->traverse(this); 1760 copy(node, trueBlock); 1761 } 1762 else 1763 { 1764 falseBlock->traverse(this); 1765 copy(node, falseBlock); 1766 } 1767 } 1768 else if(trivial(node, 6)) // Fast to compute both potential results and no side effects 1769 { 1770 trueBlock->traverse(this); 1771 falseBlock->traverse(this); 1772 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock); 1773 } 1774 else 1775 { 1776 emit(sw::Shader::OPCODE_IF, 0, condition); 1777 1778 if(trueBlock) 1779 { 1780 trueBlock->traverse(this); 1781 copy(node, trueBlock); 1782 } 1783 1784 if(falseBlock) 1785 { 1786 emit(sw::Shader::OPCODE_ELSE); 1787 falseBlock->traverse(this); 1788 copy(node, falseBlock); 1789 } 1790 1791 emit(sw::Shader::OPCODE_ENDIF); 1792 } 1793 } 1794 else // if/else statement 1795 { 1796 if(constantCondition) 1797 { 1798 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1799 1800 if(trueCondition) 1801 { 1802 if(trueBlock) 1803 { 1804 trueBlock->traverse(this); 1805 } 1806 } 1807 else 1808 { 1809 if(falseBlock) 1810 { 1811 falseBlock->traverse(this); 1812 } 1813 } 1814 } 1815 else 1816 { 1817 emit(sw::Shader::OPCODE_IF, 0, condition); 1818 1819 if(trueBlock) 1820 { 1821 trueBlock->traverse(this); 1822 } 1823 1824 if(falseBlock) 1825 { 1826 emit(sw::Shader::OPCODE_ELSE); 1827 falseBlock->traverse(this); 1828 } 1829 1830 emit(sw::Shader::OPCODE_ENDIF); 1831 } 1832 } 1833 1834 return false; 1835 } 1836 visitLoop(Visit visit,TIntermLoop * node)1837 bool OutputASM::visitLoop(Visit visit, TIntermLoop *node) 1838 { 1839 if(currentScope != emitScope) 1840 { 1841 return false; 1842 } 1843 1844 LoopInfo loop(node); 1845 1846 if(loop.iterations == 0) 1847 { 1848 return false; 1849 } 1850 1851 bool unroll = (loop.iterations <= 4); 1852 1853 TIntermNode *init = node->getInit(); 1854 TIntermTyped *condition = node->getCondition(); 1855 TIntermTyped *expression = node->getExpression(); 1856 TIntermNode *body = node->getBody(); 1857 Constant True(true); 1858 1859 if(loop.isDeterministic()) 1860 { 1861 deterministicVariables.insert(loop.index->getId()); 1862 1863 if(!unroll) 1864 { 1865 emit(sw::Shader::OPCODE_SCALAR); // Unrolled loops don't have an ENDWHILE to disable scalar mode. 1866 } 1867 } 1868 1869 if(node->getType() == ELoopDoWhile) 1870 { 1871 Temporary iterate(this); 1872 emit(sw::Shader::OPCODE_MOV, &iterate, &True); 1873 1874 emit(sw::Shader::OPCODE_WHILE, 0, &iterate); // FIXME: Implement real do-while 1875 1876 if(body) 1877 { 1878 body->traverse(this); 1879 } 1880 1881 emit(sw::Shader::OPCODE_TEST); 1882 1883 condition->traverse(this); 1884 emit(sw::Shader::OPCODE_MOV, &iterate, condition); 1885 1886 emit(sw::Shader::OPCODE_ENDWHILE); 1887 } 1888 else 1889 { 1890 if(init) 1891 { 1892 init->traverse(this); 1893 } 1894 1895 if(unroll) 1896 { 1897 mContext.info(node->getLine(), "loop unrolled", "for"); 1898 1899 for(unsigned int i = 0; i < loop.iterations; i++) 1900 { 1901 // condition->traverse(this); // Condition could contain statements, but not in an unrollable loop 1902 1903 if(body) 1904 { 1905 body->traverse(this); 1906 } 1907 1908 if(expression) 1909 { 1910 expression->traverse(this); 1911 } 1912 } 1913 } 1914 else 1915 { 1916 if(condition) 1917 { 1918 condition->traverse(this); 1919 } 1920 else 1921 { 1922 condition = &True; 1923 } 1924 1925 emit(sw::Shader::OPCODE_WHILE, 0, condition); 1926 1927 if(body) 1928 { 1929 body->traverse(this); 1930 } 1931 1932 emit(sw::Shader::OPCODE_TEST); 1933 1934 if(loop.isDeterministic()) 1935 { 1936 emit(sw::Shader::OPCODE_SCALAR); 1937 } 1938 1939 if(expression) 1940 { 1941 expression->traverse(this); 1942 } 1943 1944 if(condition) 1945 { 1946 condition->traverse(this); 1947 } 1948 1949 emit(sw::Shader::OPCODE_ENDWHILE); 1950 } 1951 } 1952 1953 if(loop.isDeterministic()) 1954 { 1955 deterministicVariables.erase(loop.index->getId()); 1956 } 1957 1958 return false; 1959 } 1960 visitBranch(Visit visit,TIntermBranch * node)1961 bool OutputASM::visitBranch(Visit visit, TIntermBranch *node) 1962 { 1963 if(currentScope != emitScope) 1964 { 1965 return false; 1966 } 1967 1968 switch(node->getFlowOp()) 1969 { 1970 case EOpKill: if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD); break; 1971 case EOpBreak: if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK); break; 1972 case EOpContinue: if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break; 1973 case EOpReturn: 1974 if(visit == PostVisit) 1975 { 1976 TIntermTyped *value = node->getExpression(); 1977 1978 if(value) 1979 { 1980 copy(functionArray[currentFunction].ret, value); 1981 } 1982 1983 emit(sw::Shader::OPCODE_LEAVE); 1984 } 1985 break; 1986 default: UNREACHABLE(node->getFlowOp()); 1987 } 1988 1989 return true; 1990 } 1991 visitSwitch(Visit visit,TIntermSwitch * node)1992 bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node) 1993 { 1994 if(currentScope != emitScope) 1995 { 1996 return false; 1997 } 1998 1999 TIntermTyped* switchValue = node->getInit(); 2000 TIntermAggregate* opList = node->getStatementList(); 2001 2002 if(!switchValue || !opList) 2003 { 2004 return false; 2005 } 2006 2007 switchValue->traverse(this); 2008 2009 emit(sw::Shader::OPCODE_SWITCH); 2010 2011 TIntermSequence& sequence = opList->getSequence(); 2012 TIntermSequence::iterator it = sequence.begin(); 2013 TIntermSequence::iterator defaultIt = sequence.end(); 2014 int nbCases = 0; 2015 for(; it != sequence.end(); ++it) 2016 { 2017 TIntermCase* currentCase = (*it)->getAsCaseNode(); 2018 if(currentCase) 2019 { 2020 TIntermSequence::iterator caseIt = it; 2021 2022 TIntermTyped* condition = currentCase->getCondition(); 2023 if(condition) // non default case 2024 { 2025 if(nbCases != 0) 2026 { 2027 emit(sw::Shader::OPCODE_ELSE); 2028 } 2029 2030 condition->traverse(this); 2031 Temporary result(this); 2032 emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition); 2033 emit(sw::Shader::OPCODE_IF, 0, &result); 2034 nbCases++; 2035 2036 // Emit the code for this case and all subsequent cases until we hit a break statement. 2037 // TODO: This can repeat a lot of code for switches with many fall-through cases. 2038 for(++caseIt; caseIt != sequence.end(); ++caseIt) 2039 { 2040 (*caseIt)->traverse(this); 2041 2042 // Stop if we encounter an unconditional branch (break, continue, return, or kill). 2043 // TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}). 2044 // Note that this eliminates useless operations but shouldn't affect correctness. 2045 if((*caseIt)->getAsBranchNode()) 2046 { 2047 break; 2048 } 2049 } 2050 } 2051 else 2052 { 2053 defaultIt = it; // The default case might not be the last case, keep it for last 2054 } 2055 } 2056 } 2057 2058 // If there's a default case, traverse it here 2059 if(defaultIt != sequence.end()) 2060 { 2061 if(nbCases != 0) 2062 { 2063 emit(sw::Shader::OPCODE_ELSE); 2064 } 2065 2066 for(++defaultIt; defaultIt != sequence.end(); ++defaultIt) 2067 { 2068 (*defaultIt)->traverse(this); 2069 if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return 2070 { 2071 break; 2072 } 2073 } 2074 } 2075 2076 for(int i = 0; i < nbCases; ++i) 2077 { 2078 emit(sw::Shader::OPCODE_ENDIF); 2079 } 2080 2081 emit(sw::Shader::OPCODE_ENDSWITCH); 2082 2083 return false; 2084 } 2085 emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2086 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4) 2087 { 2088 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0); 2089 } 2090 emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2091 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1, 2092 TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4) 2093 { 2094 Instruction *instruction = new Instruction(op); 2095 2096 if(dst) 2097 { 2098 destination(instruction->dst, dst, dstIndex); 2099 } 2100 2101 if(src0) 2102 { 2103 TIntermTyped* src = src0->getAsTyped(); 2104 instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow); 2105 } 2106 2107 source(instruction->src[0], src0, index0); 2108 source(instruction->src[1], src1, index1); 2109 source(instruction->src[2], src2, index2); 2110 source(instruction->src[3], src3, index3); 2111 source(instruction->src[4], src4, index4); 2112 2113 shader->append(instruction); 2114 2115 return instruction; 2116 } 2117 emitCast(TIntermTyped * dst,TIntermTyped * src)2118 Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src) 2119 { 2120 return emitCast(dst, 0, src, 0); 2121 } 2122 emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2123 Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex) 2124 { 2125 switch(src->getBasicType()) 2126 { 2127 case EbtBool: 2128 switch(dst->getBasicType()) 2129 { 2130 case EbtInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2131 case EbtUInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 2132 case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex); 2133 default: break; 2134 } 2135 break; 2136 case EbtInt: 2137 switch(dst->getBasicType()) 2138 { 2139 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2140 case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex); 2141 default: break; 2142 } 2143 break; 2144 case EbtUInt: 2145 switch(dst->getBasicType()) 2146 { 2147 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 2148 case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex); 2149 default: break; 2150 } 2151 break; 2152 case EbtFloat: 2153 switch(dst->getBasicType()) 2154 { 2155 case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex); 2156 case EbtInt: return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex); 2157 case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex); 2158 default: break; 2159 } 2160 break; 2161 default: 2162 break; 2163 } 2164 2165 ASSERT((src->getBasicType() == dst->getBasicType()) || 2166 ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) || 2167 ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt))); 2168 2169 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex); 2170 } 2171 emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2172 void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2) 2173 { 2174 for(int index = 0; index < dst->elementRegisterCount(); index++) 2175 { 2176 emit(op, dst, index, src0, index, src1, index, src2, index); 2177 } 2178 } 2179 emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2180 void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1) 2181 { 2182 emitBinary(op, result, src0, src1); 2183 assignLvalue(lhs, result); 2184 } 2185 emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2186 void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index) 2187 { 2188 sw::Shader::Opcode opcode; 2189 switch(left->getAsTyped()->getBasicType()) 2190 { 2191 case EbtBool: 2192 case EbtInt: 2193 opcode = sw::Shader::OPCODE_ICMP; 2194 break; 2195 case EbtUInt: 2196 opcode = sw::Shader::OPCODE_UCMP; 2197 break; 2198 default: 2199 opcode = sw::Shader::OPCODE_CMP; 2200 break; 2201 } 2202 2203 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index); 2204 cmp->control = cmpOp; 2205 } 2206 componentCount(const TType & type,int registers)2207 int componentCount(const TType &type, int registers) 2208 { 2209 if(registers == 0) 2210 { 2211 return 0; 2212 } 2213 2214 if(type.isArray() && registers >= type.elementRegisterCount()) 2215 { 2216 int index = registers / type.elementRegisterCount(); 2217 registers -= index * type.elementRegisterCount(); 2218 return index * type.getElementSize() + componentCount(type, registers); 2219 } 2220 2221 if(type.isStruct() || type.isInterfaceBlock()) 2222 { 2223 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2224 int elements = 0; 2225 2226 for(const auto &field : fields) 2227 { 2228 const TType &fieldType = *(field->type()); 2229 2230 if(fieldType.totalRegisterCount() <= registers) 2231 { 2232 registers -= fieldType.totalRegisterCount(); 2233 elements += fieldType.getObjectSize(); 2234 } 2235 else // Register within this field 2236 { 2237 return elements + componentCount(fieldType, registers); 2238 } 2239 } 2240 } 2241 else if(type.isMatrix()) 2242 { 2243 return registers * type.registerSize(); 2244 } 2245 2246 UNREACHABLE(0); 2247 return 0; 2248 } 2249 registerSize(const TType & type,int registers)2250 int registerSize(const TType &type, int registers) 2251 { 2252 if(registers == 0) 2253 { 2254 if(type.isStruct()) 2255 { 2256 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0); 2257 } 2258 else if(type.isInterfaceBlock()) 2259 { 2260 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0); 2261 } 2262 2263 return type.registerSize(); 2264 } 2265 2266 if(type.isArray() && registers >= type.elementRegisterCount()) 2267 { 2268 int index = registers / type.elementRegisterCount(); 2269 registers -= index * type.elementRegisterCount(); 2270 return registerSize(type, registers); 2271 } 2272 2273 if(type.isStruct() || type.isInterfaceBlock()) 2274 { 2275 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2276 2277 for(const auto &field : fields) 2278 { 2279 const TType &fieldType = *(field->type()); 2280 2281 if(fieldType.totalRegisterCount() <= registers) 2282 { 2283 registers -= fieldType.totalRegisterCount(); 2284 } 2285 else // Register within this field 2286 { 2287 return registerSize(fieldType, registers); 2288 } 2289 } 2290 } 2291 else if(type.isMatrix()) 2292 { 2293 return registerSize(type, 0); 2294 } 2295 2296 UNREACHABLE(0); 2297 return 0; 2298 } 2299 getBlockId(TIntermTyped * arg)2300 int OutputASM::getBlockId(TIntermTyped *arg) 2301 { 2302 if(arg) 2303 { 2304 const TType &type = arg->getType(); 2305 TInterfaceBlock* block = type.getInterfaceBlock(); 2306 if(block && (type.getQualifier() == EvqUniform)) 2307 { 2308 // Make sure the uniform block is declared 2309 uniformRegister(arg); 2310 2311 const char* blockName = block->name().c_str(); 2312 2313 // Fetch uniform block index from array of blocks 2314 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it) 2315 { 2316 if(blockName == it->name) 2317 { 2318 return it->blockId; 2319 } 2320 } 2321 2322 ASSERT(false); 2323 } 2324 } 2325 2326 return -1; 2327 } 2328 getArgumentInfo(TIntermTyped * arg,int index)2329 OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index) 2330 { 2331 const TType &type = arg->getType(); 2332 int blockId = getBlockId(arg); 2333 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1); 2334 if(blockId != -1) 2335 { 2336 argumentInfo.bufferIndex = 0; 2337 for(int i = 0; i < blockId; ++i) 2338 { 2339 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize; 2340 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1; 2341 } 2342 2343 const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId]; 2344 2345 BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end(); 2346 BlockDefinitionIndexMap::const_iterator it = itEnd; 2347 2348 argumentInfo.clampedIndex = index; 2349 if(type.isInterfaceBlock()) 2350 { 2351 // Offset index to the beginning of the selected instance 2352 int blockRegisters = type.elementRegisterCount(); 2353 int bufferOffset = argumentInfo.clampedIndex / blockRegisters; 2354 argumentInfo.bufferIndex += bufferOffset; 2355 argumentInfo.clampedIndex -= bufferOffset * blockRegisters; 2356 } 2357 2358 int regIndex = registerIndex(arg); 2359 for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i) 2360 { 2361 it = blockDefinition.find(i); 2362 if(it != itEnd) 2363 { 2364 argumentInfo.clampedIndex -= (i - regIndex); 2365 break; 2366 } 2367 } 2368 ASSERT(it != itEnd); 2369 2370 argumentInfo.typedMemberInfo = it->second; 2371 2372 int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount(); 2373 argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex; 2374 } 2375 else 2376 { 2377 argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index; 2378 } 2379 2380 return argumentInfo; 2381 } 2382 source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2383 void OutputASM::source(sw::Shader::SourceParameter ¶meter, TIntermNode *argument, int index) 2384 { 2385 if(argument) 2386 { 2387 TIntermTyped *arg = argument->getAsTyped(); 2388 Temporary unpackedUniform(this); 2389 2390 const TType& srcType = arg->getType(); 2391 TInterfaceBlock* srcBlock = srcType.getInterfaceBlock(); 2392 if(srcBlock && (srcType.getQualifier() == EvqUniform)) 2393 { 2394 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2395 const TType &memberType = argumentInfo.typedMemberInfo.type; 2396 2397 if(memberType.getBasicType() == EbtBool) 2398 { 2399 ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize 2400 2401 // Convert the packed bool, which is currently an int, to a true bool 2402 Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B); 2403 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2404 instruction->dst.index = registerIndex(&unpackedUniform); 2405 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2406 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2407 instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride; 2408 2409 shader->append(instruction); 2410 2411 arg = &unpackedUniform; 2412 index = 0; 2413 } 2414 else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix()) 2415 { 2416 int numCols = memberType.getNominalSize(); 2417 int numRows = memberType.getSecondarySize(); 2418 2419 ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize 2420 2421 unsigned int dstIndex = registerIndex(&unpackedUniform); 2422 unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55; 2423 int arrayIndex = argumentInfo.clampedIndex / numCols; 2424 int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride; 2425 2426 for(int j = 0; j < numRows; ++j) 2427 { 2428 // Transpose the row major matrix 2429 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV); 2430 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2431 instruction->dst.index = dstIndex; 2432 instruction->dst.mask = 1 << j; 2433 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2434 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2435 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride; 2436 instruction->src[0].swizzle = srcSwizzle; 2437 2438 shader->append(instruction); 2439 } 2440 2441 arg = &unpackedUniform; 2442 index = 0; 2443 } 2444 } 2445 2446 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2447 const TType &type = argumentInfo.typedMemberInfo.type; 2448 2449 int size = registerSize(type, argumentInfo.clampedIndex); 2450 2451 parameter.type = registerType(arg); 2452 parameter.bufferIndex = argumentInfo.bufferIndex; 2453 2454 if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer()) 2455 { 2456 int component = componentCount(type, argumentInfo.clampedIndex); 2457 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer(); 2458 2459 for(int i = 0; i < 4; i++) 2460 { 2461 if(size == 1) // Replicate 2462 { 2463 parameter.value[i] = constants[component + 0].getAsFloat(); 2464 } 2465 else if(i < size) 2466 { 2467 parameter.value[i] = constants[component + i].getAsFloat(); 2468 } 2469 else 2470 { 2471 parameter.value[i] = 0.0f; 2472 } 2473 } 2474 } 2475 else 2476 { 2477 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex; 2478 2479 if(parameter.bufferIndex != -1) 2480 { 2481 int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride; 2482 parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride; 2483 } 2484 2485 if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS) 2486 { 2487 mContext.error(arg->getLine(), 2488 "Too many temporary registers required to compile shader", 2489 pixelShader ? "pixel shader" : "vertex shader"); 2490 } 2491 } 2492 2493 if(!IsSampler(arg->getBasicType())) 2494 { 2495 parameter.swizzle = readSwizzle(arg, size); 2496 } 2497 } 2498 } 2499 destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2500 void OutputASM::destination(sw::Shader::DestinationParameter ¶meter, TIntermTyped *arg, int index) 2501 { 2502 parameter.type = registerType(arg); 2503 parameter.index = registerIndex(arg) + index; 2504 parameter.mask = writeMask(arg, index); 2505 2506 if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS) 2507 { 2508 mContext.error(arg->getLine(), 2509 "Too many temporary registers required to compile shader", 2510 pixelShader ? "pixel shader" : "vertex shader"); 2511 } 2512 2513 } 2514 copy(TIntermTyped * dst,TIntermNode * src,int offset)2515 void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset) 2516 { 2517 for(int index = 0; index < dst->totalRegisterCount(); index++) 2518 { 2519 emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index); 2520 } 2521 } 2522 swizzleElement(int swizzle,int index)2523 int swizzleElement(int swizzle, int index) 2524 { 2525 return (swizzle >> (index * 2)) & 0x03; 2526 } 2527 swizzleSwizzle(int leftSwizzle,int rightSwizzle)2528 int swizzleSwizzle(int leftSwizzle, int rightSwizzle) 2529 { 2530 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) | 2531 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) | 2532 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) | 2533 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6); 2534 } 2535 assignLvalue(TIntermTyped * dst,TIntermTyped * src)2536 void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src) 2537 { 2538 if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) || 2539 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))) 2540 { 2541 return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix"); 2542 } 2543 2544 TIntermBinary *binary = dst->getAsBinaryNode(); 2545 2546 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar()) 2547 { 2548 Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT); 2549 2550 lvalue(insert->dst, dst); 2551 2552 insert->src[0].type = insert->dst.type; 2553 insert->src[0].index = insert->dst.index; 2554 insert->src[0].rel = insert->dst.rel; 2555 source(insert->src[1], src); 2556 source(insert->src[2], binary->getRight()); 2557 2558 shader->append(insert); 2559 } 2560 else 2561 { 2562 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2563 2564 int swizzle = lvalue(mov1->dst, dst); 2565 2566 source(mov1->src[0], src); 2567 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2568 2569 shader->append(mov1); 2570 2571 for(int offset = 1; offset < dst->totalRegisterCount(); offset++) 2572 { 2573 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV); 2574 2575 mov->dst = mov1->dst; 2576 mov->dst.index += offset; 2577 mov->dst.mask = writeMask(dst, offset); 2578 2579 source(mov->src[0], src, offset); 2580 2581 shader->append(mov); 2582 } 2583 } 2584 } 2585 evaluateRvalue(TIntermTyped * node)2586 void OutputASM::evaluateRvalue(TIntermTyped *node) 2587 { 2588 TIntermBinary *binary = node->getAsBinaryNode(); 2589 2590 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar()) 2591 { 2592 Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT); 2593 2594 destination(insert->dst, node); 2595 2596 Temporary address(this); 2597 unsigned char mask; 2598 TIntermTyped *root = nullptr; 2599 unsigned int offset = 0; 2600 int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node); 2601 2602 source(insert->src[0], root, offset); 2603 insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle); 2604 2605 source(insert->src[1], binary->getRight()); 2606 2607 shader->append(insert); 2608 } 2609 else 2610 { 2611 Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV); 2612 2613 destination(mov1->dst, node, 0); 2614 2615 Temporary address(this); 2616 unsigned char mask; 2617 TIntermTyped *root = nullptr; 2618 unsigned int offset = 0; 2619 int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node); 2620 2621 source(mov1->src[0], root, offset); 2622 mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle); 2623 2624 shader->append(mov1); 2625 2626 for(int i = 1; i < node->totalRegisterCount(); i++) 2627 { 2628 Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i); 2629 mov->src[0].rel = mov1->src[0].rel; 2630 } 2631 } 2632 } 2633 lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2634 int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node) 2635 { 2636 Temporary address(this); 2637 TIntermTyped *root = nullptr; 2638 unsigned int offset = 0; 2639 unsigned char mask = 0xF; 2640 int swizzle = lvalue(root, offset, dst.rel, mask, address, node); 2641 2642 dst.type = registerType(root); 2643 dst.index = registerIndex(root) + offset; 2644 dst.mask = mask; 2645 2646 return swizzle; 2647 } 2648 lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2649 int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node) 2650 { 2651 TIntermTyped *result = node; 2652 TIntermBinary *binary = node->getAsBinaryNode(); 2653 TIntermSymbol *symbol = node->getAsSymbolNode(); 2654 2655 if(binary) 2656 { 2657 TIntermTyped *left = binary->getLeft(); 2658 TIntermTyped *right = binary->getRight(); 2659 2660 int leftSwizzle = lvalue(root, offset, rel, mask, address, left); // Resolve the l-value of the left side 2661 2662 switch(binary->getOp()) 2663 { 2664 case EOpIndexDirect: 2665 { 2666 int rightIndex = right->getAsConstantUnion()->getIConst(0); 2667 2668 if(left->isRegister()) 2669 { 2670 int leftMask = mask; 2671 2672 mask = 1; 2673 while((leftMask & mask) == 0) 2674 { 2675 mask = mask << 1; 2676 } 2677 2678 int element = swizzleElement(leftSwizzle, rightIndex); 2679 mask = 1 << element; 2680 2681 return element; 2682 } 2683 else if(left->isArray() || left->isMatrix()) 2684 { 2685 offset += rightIndex * result->totalRegisterCount(); 2686 return 0xE4; 2687 } 2688 else UNREACHABLE(0); 2689 } 2690 break; 2691 case EOpIndexIndirect: 2692 { 2693 right->traverse(this); 2694 2695 if(left->isRegister()) 2696 { 2697 // Requires INSERT instruction (handled by calling function) 2698 } 2699 else if(left->isArray() || left->isMatrix()) 2700 { 2701 int scale = result->totalRegisterCount(); 2702 2703 if(rel.type == sw::Shader::PARAMETER_VOID) // Use the index register as the relative address directly 2704 { 2705 if(left->totalRegisterCount() > 1) 2706 { 2707 sw::Shader::SourceParameter relativeRegister; 2708 source(relativeRegister, right); 2709 2710 int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0; 2711 2712 rel.index = relativeRegister.index; 2713 rel.type = relativeRegister.type; 2714 rel.scale = scale; 2715 rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0); 2716 } 2717 } 2718 else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register 2719 { 2720 if(scale == 1) 2721 { 2722 Constant oldScale((int)rel.scale); 2723 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right); 2724 mad->src[0].index = rel.index; 2725 mad->src[0].type = rel.type; 2726 } 2727 else 2728 { 2729 Constant oldScale((int)rel.scale); 2730 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale); 2731 mul->src[0].index = rel.index; 2732 mul->src[0].type = rel.type; 2733 2734 Constant newScale(scale); 2735 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2736 } 2737 2738 rel.type = sw::Shader::PARAMETER_TEMP; 2739 rel.index = registerIndex(&address); 2740 rel.scale = 1; 2741 } 2742 else // Just add the new index to the address register 2743 { 2744 if(scale == 1) 2745 { 2746 emit(sw::Shader::OPCODE_IADD, &address, &address, right); 2747 } 2748 else 2749 { 2750 Constant newScale(scale); 2751 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2752 } 2753 } 2754 } 2755 else UNREACHABLE(0); 2756 } 2757 break; 2758 case EOpIndexDirectStruct: 2759 case EOpIndexDirectInterfaceBlock: 2760 { 2761 const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ? 2762 left->getType().getStruct()->fields() : 2763 left->getType().getInterfaceBlock()->fields(); 2764 int index = right->getAsConstantUnion()->getIConst(0); 2765 int fieldOffset = 0; 2766 2767 for(int i = 0; i < index; i++) 2768 { 2769 fieldOffset += fields[i]->type()->totalRegisterCount(); 2770 } 2771 2772 offset += fieldOffset; 2773 mask = writeMask(result); 2774 2775 return 0xE4; 2776 } 2777 break; 2778 case EOpVectorSwizzle: 2779 { 2780 ASSERT(left->isRegister()); 2781 2782 int leftMask = mask; 2783 2784 int swizzle = 0; 2785 int rightMask = 0; 2786 2787 TIntermSequence &sequence = right->getAsAggregate()->getSequence(); 2788 2789 for(unsigned int i = 0; i < sequence.size(); i++) 2790 { 2791 int index = sequence[i]->getAsConstantUnion()->getIConst(0); 2792 2793 int element = swizzleElement(leftSwizzle, index); 2794 rightMask = rightMask | (1 << element); 2795 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2); 2796 } 2797 2798 mask = leftMask & rightMask; 2799 2800 return swizzle; 2801 } 2802 break; 2803 default: 2804 UNREACHABLE(binary->getOp()); // Not an l-value operator 2805 break; 2806 } 2807 } 2808 else if(symbol) 2809 { 2810 root = symbol; 2811 offset = 0; 2812 mask = writeMask(symbol); 2813 2814 return 0xE4; 2815 } 2816 else 2817 { 2818 node->traverse(this); 2819 2820 root = node; 2821 offset = 0; 2822 mask = writeMask(node); 2823 2824 return 0xE4; 2825 } 2826 2827 return 0xE4; 2828 } 2829 registerType(TIntermTyped * operand)2830 sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand) 2831 { 2832 if(isSamplerRegister(operand)) 2833 { 2834 return sw::Shader::PARAMETER_SAMPLER; 2835 } 2836 2837 const TQualifier qualifier = operand->getQualifier(); 2838 if((qualifier == EvqFragColor) || (qualifier == EvqFragData)) 2839 { 2840 if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) || 2841 ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData))) 2842 { 2843 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", ""); 2844 } 2845 outputQualifier = qualifier; 2846 } 2847 2848 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2849 { 2850 // Constant arrays are in the constant register file. 2851 if(operand->isArray() && operand->getArraySize() > 1) 2852 { 2853 return sw::Shader::PARAMETER_CONST; 2854 } 2855 else 2856 { 2857 return sw::Shader::PARAMETER_TEMP; 2858 } 2859 } 2860 2861 switch(qualifier) 2862 { 2863 case EvqTemporary: return sw::Shader::PARAMETER_TEMP; 2864 case EvqGlobal: return sw::Shader::PARAMETER_TEMP; 2865 case EvqConstExpr: return sw::Shader::PARAMETER_FLOAT4LITERAL; // All converted to float 2866 case EvqAttribute: return sw::Shader::PARAMETER_INPUT; 2867 case EvqVaryingIn: return sw::Shader::PARAMETER_INPUT; 2868 case EvqVaryingOut: return sw::Shader::PARAMETER_OUTPUT; 2869 case EvqVertexIn: return sw::Shader::PARAMETER_INPUT; 2870 case EvqFragmentOut: return sw::Shader::PARAMETER_COLOROUT; 2871 case EvqVertexOut: return sw::Shader::PARAMETER_OUTPUT; 2872 case EvqFragmentIn: return sw::Shader::PARAMETER_INPUT; 2873 case EvqInvariantVaryingIn: return sw::Shader::PARAMETER_INPUT; // FIXME: Guarantee invariance at the backend 2874 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT; // FIXME: Guarantee invariance at the backend 2875 case EvqSmooth: return sw::Shader::PARAMETER_OUTPUT; 2876 case EvqFlat: return sw::Shader::PARAMETER_OUTPUT; 2877 case EvqCentroidOut: return sw::Shader::PARAMETER_OUTPUT; 2878 case EvqSmoothIn: return sw::Shader::PARAMETER_INPUT; 2879 case EvqFlatIn: return sw::Shader::PARAMETER_INPUT; 2880 case EvqCentroidIn: return sw::Shader::PARAMETER_INPUT; 2881 case EvqUniform: return sw::Shader::PARAMETER_CONST; 2882 case EvqIn: return sw::Shader::PARAMETER_TEMP; 2883 case EvqOut: return sw::Shader::PARAMETER_TEMP; 2884 case EvqInOut: return sw::Shader::PARAMETER_TEMP; 2885 case EvqConstReadOnly: return sw::Shader::PARAMETER_TEMP; 2886 case EvqPosition: return sw::Shader::PARAMETER_OUTPUT; 2887 case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT; 2888 case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE; 2889 case EvqVertexID: return sw::Shader::PARAMETER_MISCTYPE; 2890 case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE; 2891 case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE; 2892 case EvqPointCoord: return sw::Shader::PARAMETER_INPUT; 2893 case EvqFragColor: return sw::Shader::PARAMETER_COLOROUT; 2894 case EvqFragData: return sw::Shader::PARAMETER_COLOROUT; 2895 case EvqFragDepth: return sw::Shader::PARAMETER_DEPTHOUT; 2896 default: UNREACHABLE(qualifier); 2897 } 2898 2899 return sw::Shader::PARAMETER_VOID; 2900 } 2901 hasFlatQualifier(TIntermTyped * operand)2902 bool OutputASM::hasFlatQualifier(TIntermTyped *operand) 2903 { 2904 const TQualifier qualifier = operand->getQualifier(); 2905 return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn; 2906 } 2907 registerIndex(TIntermTyped * operand)2908 unsigned int OutputASM::registerIndex(TIntermTyped *operand) 2909 { 2910 if(isSamplerRegister(operand)) 2911 { 2912 return samplerRegister(operand); 2913 } 2914 else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler 2915 { 2916 samplerRegister(operand); // Make sure the sampler is declared 2917 } 2918 2919 const TQualifier qualifier = operand->getQualifier(); 2920 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2921 { 2922 // Constant arrays are in the constant register file. 2923 if(operand->isArray() && operand->getArraySize() > 1) 2924 { 2925 return uniformRegister(operand); 2926 } 2927 else 2928 { 2929 return temporaryRegister(operand); 2930 } 2931 } 2932 2933 switch(operand->getQualifier()) 2934 { 2935 case EvqTemporary: return temporaryRegister(operand); 2936 case EvqGlobal: return temporaryRegister(operand); 2937 case EvqConstExpr: return temporaryRegister(operand); // Unevaluated constant expression 2938 case EvqAttribute: return attributeRegister(operand); 2939 case EvqVaryingIn: return varyingRegister(operand); 2940 case EvqVaryingOut: return varyingRegister(operand); 2941 case EvqVertexIn: return attributeRegister(operand); 2942 case EvqFragmentOut: return fragmentOutputRegister(operand); 2943 case EvqVertexOut: return varyingRegister(operand); 2944 case EvqFragmentIn: return varyingRegister(operand); 2945 case EvqInvariantVaryingIn: return varyingRegister(operand); 2946 case EvqInvariantVaryingOut: return varyingRegister(operand); 2947 case EvqSmooth: return varyingRegister(operand); 2948 case EvqFlat: return varyingRegister(operand); 2949 case EvqCentroidOut: return varyingRegister(operand); 2950 case EvqSmoothIn: return varyingRegister(operand); 2951 case EvqFlatIn: return varyingRegister(operand); 2952 case EvqCentroidIn: return varyingRegister(operand); 2953 case EvqUniform: return uniformRegister(operand); 2954 case EvqIn: return temporaryRegister(operand); 2955 case EvqOut: return temporaryRegister(operand); 2956 case EvqInOut: return temporaryRegister(operand); 2957 case EvqConstReadOnly: return temporaryRegister(operand); 2958 case EvqPosition: return varyingRegister(operand); 2959 case EvqPointSize: return varyingRegister(operand); 2960 case EvqInstanceID: vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex; 2961 case EvqVertexID: vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex; 2962 case EvqFragCoord: pixelShader->declareVPos(); return sw::Shader::VPosIndex; 2963 case EvqFrontFacing: pixelShader->declareVFace(); return sw::Shader::VFaceIndex; 2964 case EvqPointCoord: return varyingRegister(operand); 2965 case EvqFragColor: return 0; 2966 case EvqFragData: return fragmentOutputRegister(operand); 2967 case EvqFragDepth: return 0; 2968 default: UNREACHABLE(operand->getQualifier()); 2969 } 2970 2971 return 0; 2972 } 2973 writeMask(TIntermTyped * destination,int index)2974 int OutputASM::writeMask(TIntermTyped *destination, int index) 2975 { 2976 if(destination->getQualifier() == EvqPointSize) 2977 { 2978 return 0x2; // Point size stored in the y component 2979 } 2980 2981 return 0xF >> (4 - registerSize(destination->getType(), index)); 2982 } 2983 readSwizzle(TIntermTyped * argument,int size)2984 int OutputASM::readSwizzle(TIntermTyped *argument, int size) 2985 { 2986 if(argument->getQualifier() == EvqPointSize) 2987 { 2988 return 0x55; // Point size stored in the y component 2989 } 2990 2991 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4}; // (void), xxxx, xyyy, xyzz, xyzw 2992 2993 return swizzleSize[size]; 2994 } 2995 2996 // Conservatively checks whether an expression is fast to compute and has no side effects trivial(TIntermTyped * expression,int budget)2997 bool OutputASM::trivial(TIntermTyped *expression, int budget) 2998 { 2999 if(!expression->isRegister()) 3000 { 3001 return false; 3002 } 3003 3004 return cost(expression, budget) >= 0; 3005 } 3006 3007 // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects) cost(TIntermNode * expression,int budget)3008 int OutputASM::cost(TIntermNode *expression, int budget) 3009 { 3010 if(budget < 0) 3011 { 3012 return budget; 3013 } 3014 3015 if(expression->getAsSymbolNode()) 3016 { 3017 return budget; 3018 } 3019 else if(expression->getAsConstantUnion()) 3020 { 3021 return budget; 3022 } 3023 else if(expression->getAsBinaryNode()) 3024 { 3025 TIntermBinary *binary = expression->getAsBinaryNode(); 3026 3027 switch(binary->getOp()) 3028 { 3029 case EOpVectorSwizzle: 3030 case EOpIndexDirect: 3031 case EOpIndexDirectStruct: 3032 case EOpIndexDirectInterfaceBlock: 3033 return cost(binary->getLeft(), budget - 0); 3034 case EOpAdd: 3035 case EOpSub: 3036 case EOpMul: 3037 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1)); 3038 default: 3039 return -1; 3040 } 3041 } 3042 else if(expression->getAsUnaryNode()) 3043 { 3044 TIntermUnary *unary = expression->getAsUnaryNode(); 3045 3046 switch(unary->getOp()) 3047 { 3048 case EOpAbs: 3049 case EOpNegative: 3050 return cost(unary->getOperand(), budget - 1); 3051 default: 3052 return -1; 3053 } 3054 } 3055 else if(expression->getAsSelectionNode()) 3056 { 3057 TIntermSelection *selection = expression->getAsSelectionNode(); 3058 3059 if(selection->usesTernaryOperator()) 3060 { 3061 TIntermTyped *condition = selection->getCondition(); 3062 TIntermNode *trueBlock = selection->getTrueBlock(); 3063 TIntermNode *falseBlock = selection->getFalseBlock(); 3064 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 3065 3066 if(constantCondition) 3067 { 3068 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 3069 3070 if(trueCondition) 3071 { 3072 return cost(trueBlock, budget - 0); 3073 } 3074 else 3075 { 3076 return cost(falseBlock, budget - 0); 3077 } 3078 } 3079 else 3080 { 3081 return cost(trueBlock, cost(falseBlock, budget - 2)); 3082 } 3083 } 3084 } 3085 3086 return -1; 3087 } 3088 findFunction(const TString & name)3089 const Function *OutputASM::findFunction(const TString &name) 3090 { 3091 for(unsigned int f = 0; f < functionArray.size(); f++) 3092 { 3093 if(functionArray[f].name == name) 3094 { 3095 return &functionArray[f]; 3096 } 3097 } 3098 3099 return 0; 3100 } 3101 temporaryRegister(TIntermTyped * temporary)3102 int OutputASM::temporaryRegister(TIntermTyped *temporary) 3103 { 3104 int index = allocate(temporaries, temporary); 3105 if(index >= sw::NUM_TEMPORARY_REGISTERS) 3106 { 3107 mContext.error(temporary->getLine(), 3108 "Too many temporary registers required to compile shader", 3109 pixelShader ? "pixel shader" : "vertex shader"); 3110 } 3111 return index; 3112 } 3113 setPixelShaderInputs(const TType & type,int var,bool flat)3114 void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat) 3115 { 3116 if(type.isStruct()) 3117 { 3118 const TFieldList &fields = type.getStruct()->fields(); 3119 int fieldVar = var; 3120 for(const auto &field : fields) 3121 { 3122 const TType& fieldType = *(field->type()); 3123 setPixelShaderInputs(fieldType, fieldVar, flat); 3124 fieldVar += fieldType.totalRegisterCount(); 3125 } 3126 } 3127 else 3128 { 3129 for(int i = 0; i < type.totalRegisterCount(); i++) 3130 { 3131 pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat)); 3132 } 3133 } 3134 } 3135 varyingRegister(TIntermTyped * varying)3136 int OutputASM::varyingRegister(TIntermTyped *varying) 3137 { 3138 int var = lookup(varyings, varying); 3139 3140 if(var == -1) 3141 { 3142 var = allocate(varyings, varying); 3143 if (var == -1) 3144 { 3145 return 0; 3146 } 3147 int registerCount = varying->totalRegisterCount(); 3148 3149 if(pixelShader) 3150 { 3151 if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS) 3152 { 3153 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader"); 3154 return 0; 3155 } 3156 3157 if(varying->getQualifier() == EvqPointCoord) 3158 { 3159 ASSERT(varying->isRegister()); 3160 pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var)); 3161 } 3162 else 3163 { 3164 setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying)); 3165 } 3166 } 3167 else if(vertexShader) 3168 { 3169 if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS) 3170 { 3171 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader"); 3172 return 0; 3173 } 3174 3175 if(varying->getQualifier() == EvqPosition) 3176 { 3177 ASSERT(varying->isRegister()); 3178 vertexShader->setPositionRegister(var); 3179 } 3180 else if(varying->getQualifier() == EvqPointSize) 3181 { 3182 ASSERT(varying->isRegister()); 3183 vertexShader->setPointSizeRegister(var); 3184 } 3185 else 3186 { 3187 // Semantic indexes for user varyings will be assigned during program link to match the pixel shader 3188 } 3189 } 3190 else UNREACHABLE(0); 3191 3192 declareVarying(varying, var); 3193 } 3194 3195 return var; 3196 } 3197 declareVarying(TIntermTyped * varying,int reg)3198 void OutputASM::declareVarying(TIntermTyped *varying, int reg) 3199 { 3200 if(varying->getQualifier() != EvqPointCoord) // gl_PointCoord does not need linking 3201 { 3202 TIntermSymbol *symbol = varying->getAsSymbolNode(); 3203 declareVarying(varying->getType(), symbol->getSymbol(), reg); 3204 } 3205 } 3206 declareVarying(const TType & type,const TString & varyingName,int registerIndex)3207 void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex) 3208 { 3209 const char *name = varyingName.c_str(); 3210 VaryingList &activeVaryings = shaderObject->varyings; 3211 3212 TStructure* structure = type.getStruct(); 3213 if(structure) 3214 { 3215 int fieldRegisterIndex = registerIndex; 3216 3217 const TFieldList &fields = type.getStruct()->fields(); 3218 for(const auto &field : fields) 3219 { 3220 const TType& fieldType = *(field->type()); 3221 declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex); 3222 if(fieldRegisterIndex >= 0) 3223 { 3224 fieldRegisterIndex += fieldType.totalRegisterCount(); 3225 } 3226 } 3227 } 3228 else 3229 { 3230 // Check if this varying has been declared before without having a register assigned 3231 for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++) 3232 { 3233 if(v->name == name) 3234 { 3235 if(registerIndex >= 0) 3236 { 3237 ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex); 3238 v->registerIndex = registerIndex; 3239 } 3240 3241 return; 3242 } 3243 } 3244 3245 activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0)); 3246 } 3247 } 3248 declareFragmentOutput(TIntermTyped * fragmentOutput)3249 void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput) 3250 { 3251 int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location; 3252 int registerCount = fragmentOutput->totalRegisterCount(); 3253 if(requestedLocation < 0) 3254 { 3255 ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier 3256 return; // No requested location 3257 } 3258 else if((requestedLocation + registerCount) > sw::RENDERTARGETS) 3259 { 3260 mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader"); 3261 } 3262 else 3263 { 3264 int currentIndex = lookup(fragmentOutputs, fragmentOutput); 3265 if(requestedLocation != currentIndex) 3266 { 3267 if(currentIndex != -1) 3268 { 3269 mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader"); 3270 } 3271 else 3272 { 3273 if(fragmentOutputs.size() <= (size_t)requestedLocation) 3274 { 3275 while(fragmentOutputs.size() < (size_t)requestedLocation) 3276 { 3277 fragmentOutputs.push_back(nullptr); 3278 } 3279 for(int i = 0; i < registerCount; i++) 3280 { 3281 fragmentOutputs.push_back(fragmentOutput); 3282 } 3283 } 3284 else 3285 { 3286 for(int i = 0; i < registerCount; i++) 3287 { 3288 if(!fragmentOutputs[requestedLocation + i]) 3289 { 3290 fragmentOutputs[requestedLocation + i] = fragmentOutput; 3291 } 3292 else 3293 { 3294 mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader"); 3295 return; 3296 } 3297 } 3298 } 3299 } 3300 } 3301 } 3302 } 3303 uniformRegister(TIntermTyped * uniform)3304 int OutputASM::uniformRegister(TIntermTyped *uniform) 3305 { 3306 const TType &type = uniform->getType(); 3307 ASSERT(!IsSampler(type.getBasicType())); 3308 TInterfaceBlock *block = type.getAsInterfaceBlock(); 3309 TIntermSymbol *symbol = uniform->getAsSymbolNode(); 3310 ASSERT(symbol || block); 3311 3312 if(symbol || block) 3313 { 3314 TInterfaceBlock* parentBlock = type.getInterfaceBlock(); 3315 bool isBlockMember = (!block && parentBlock); 3316 int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform); 3317 3318 if(index == -1 || isBlockMember) 3319 { 3320 if(index == -1) 3321 { 3322 index = allocate(uniforms, uniform); 3323 if (index == -1) 3324 { 3325 return 0; 3326 } 3327 } 3328 3329 // Verify if the current uniform is a member of an already declared block 3330 const TString &name = symbol ? symbol->getSymbol() : block->name(); 3331 int blockMemberIndex = blockMemberLookup(type, name, index); 3332 if(blockMemberIndex == -1) 3333 { 3334 declareUniform(type, name, index, false); 3335 } 3336 else 3337 { 3338 index = blockMemberIndex; 3339 } 3340 } 3341 3342 return index; 3343 } 3344 3345 return 0; 3346 } 3347 attributeRegister(TIntermTyped * attribute)3348 int OutputASM::attributeRegister(TIntermTyped *attribute) 3349 { 3350 ASSERT(!attribute->isArray()); 3351 3352 int index = lookup(attributes, attribute); 3353 3354 if(index == -1) 3355 { 3356 TIntermSymbol *symbol = attribute->getAsSymbolNode(); 3357 ASSERT(symbol); 3358 3359 if(symbol) 3360 { 3361 index = allocate(attributes, attribute); 3362 if (index == -1) 3363 { 3364 return -1; 3365 } 3366 const TType &type = attribute->getType(); 3367 int registerCount = attribute->totalRegisterCount(); 3368 sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT; 3369 switch(type.getBasicType()) 3370 { 3371 case EbtInt: 3372 attribType = sw::VertexShader::ATTRIBTYPE_INT; 3373 break; 3374 case EbtUInt: 3375 attribType = sw::VertexShader::ATTRIBTYPE_UINT; 3376 break; 3377 case EbtFloat: 3378 default: 3379 break; 3380 } 3381 3382 if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS) 3383 { 3384 for(int i = 0; i < registerCount; i++) 3385 { 3386 vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType); 3387 } 3388 } 3389 3390 ActiveAttributes &activeAttributes = shaderObject->activeAttributes; 3391 3392 const char *name = symbol->getSymbol().c_str(); 3393 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index)); 3394 } 3395 } 3396 3397 return index; 3398 } 3399 fragmentOutputRegister(TIntermTyped * fragmentOutput)3400 int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput) 3401 { 3402 return allocate(fragmentOutputs, fragmentOutput); 3403 } 3404 samplerRegister(TIntermTyped * sampler)3405 int OutputASM::samplerRegister(TIntermTyped *sampler) 3406 { 3407 const TType &type = sampler->getType(); 3408 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3409 3410 TIntermSymbol *symbol = sampler->getAsSymbolNode(); 3411 TIntermBinary *binary = sampler->getAsBinaryNode(); 3412 3413 if(symbol) 3414 { 3415 switch(type.getQualifier()) 3416 { 3417 case EvqUniform: 3418 return samplerRegister(symbol); 3419 case EvqIn: 3420 case EvqConstReadOnly: 3421 // Function arguments are not (uniform) sampler registers 3422 return -1; 3423 default: 3424 UNREACHABLE(type.getQualifier()); 3425 } 3426 } 3427 else if(binary) 3428 { 3429 TIntermTyped *left = binary->getLeft(); 3430 TIntermTyped *right = binary->getRight(); 3431 const TType &leftType = left->getType(); 3432 int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0; 3433 int offset = 0; 3434 3435 switch(binary->getOp()) 3436 { 3437 case EOpIndexDirect: 3438 ASSERT(left->isArray()); 3439 offset = index * leftType.samplerRegisterCount(); 3440 break; 3441 case EOpIndexDirectStruct: 3442 ASSERT(leftType.isStruct()); 3443 { 3444 const TFieldList &fields = leftType.getStruct()->fields(); 3445 3446 for(int i = 0; i < index; i++) 3447 { 3448 offset += fields[i]->type()->totalSamplerRegisterCount(); 3449 } 3450 } 3451 break; 3452 case EOpIndexIndirect: // Indirect indexing produces a temporary, not a sampler register 3453 return -1; 3454 case EOpIndexDirectInterfaceBlock: // Interface blocks can't contain samplers 3455 default: 3456 UNREACHABLE(binary->getOp()); 3457 return -1; 3458 } 3459 3460 int base = samplerRegister(left); 3461 3462 if(base < 0) 3463 { 3464 return -1; 3465 } 3466 3467 return base + offset; 3468 } 3469 3470 UNREACHABLE(0); 3471 return -1; // Not a (uniform) sampler register 3472 } 3473 samplerRegister(TIntermSymbol * sampler)3474 int OutputASM::samplerRegister(TIntermSymbol *sampler) 3475 { 3476 const TType &type = sampler->getType(); 3477 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3478 3479 int index = lookup(samplers, sampler); 3480 3481 if(index == -1) 3482 { 3483 index = allocate(samplers, sampler, true); 3484 if (index == -1) 3485 { 3486 return 0; 3487 } 3488 3489 if(sampler->getQualifier() == EvqUniform) 3490 { 3491 const char *name = sampler->getSymbol().c_str(); 3492 declareUniform(type, name, index, true); 3493 } 3494 } 3495 3496 return index; 3497 } 3498 isSamplerRegister(TIntermTyped * operand)3499 bool OutputASM::isSamplerRegister(TIntermTyped *operand) 3500 { 3501 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0; 3502 } 3503 arrayExceedsLimits(TIntermTyped * operand)3504 bool OutputASM::arrayExceedsLimits(TIntermTyped *operand) 3505 { 3506 const TVariable *maxUniformVectors = nullptr; 3507 TString builtinName = ""; 3508 if (vertexShader) 3509 { 3510 builtinName = "gl_MaxVertexUniformVectors"; 3511 } 3512 else if (pixelShader) 3513 { 3514 builtinName = "gl_MaxFragmentUniformVectors"; 3515 } 3516 maxUniformVectors = static_cast<const TVariable *>(mContext.symbolTable.findBuiltIn(builtinName.c_str(), mContext.getShaderVersion())); 3517 if (operand->getArraySize() > maxUniformVectors->getConstPointer()->getIConst()) 3518 { 3519 std::stringstream extraInfoStream; 3520 extraInfoStream << "Array size (" << operand->getArraySize() << ") " 3521 << "exceeds limit of " << builtinName 3522 << " (" << maxUniformVectors->getConstPointer()->getIConst() << ")"; 3523 std::string errorStr = extraInfoStream.str(); 3524 mContext.error(operand->getLine(), errorStr.c_str(), 3525 operand->getBasicString()); 3526 return true; 3527 } 3528 return false; 3529 } 3530 lookup(VariableArray & list,TIntermTyped * variable)3531 int OutputASM::lookup(VariableArray &list, TIntermTyped *variable) 3532 { 3533 for(unsigned int i = 0; i < list.size(); i++) 3534 { 3535 if(list[i] == variable) 3536 { 3537 return i; // Pointer match 3538 } 3539 } 3540 3541 TIntermSymbol *varSymbol = variable->getAsSymbolNode(); 3542 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock(); 3543 3544 if(varBlock) 3545 { 3546 for(unsigned int i = 0; i < list.size(); i++) 3547 { 3548 if(list[i]) 3549 { 3550 TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock(); 3551 3552 if(listBlock) 3553 { 3554 if(listBlock->name() == varBlock->name()) 3555 { 3556 ASSERT(listBlock->arraySize() == varBlock->arraySize()); 3557 ASSERT(listBlock->fields() == varBlock->fields()); 3558 ASSERT(listBlock->blockStorage() == varBlock->blockStorage()); 3559 ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking()); 3560 3561 return i; 3562 } 3563 } 3564 } 3565 } 3566 } 3567 else if(varSymbol) 3568 { 3569 for(unsigned int i = 0; i < list.size(); i++) 3570 { 3571 if(list[i]) 3572 { 3573 TIntermSymbol *listSymbol = list[i]->getAsSymbolNode(); 3574 3575 if(listSymbol) 3576 { 3577 if(listSymbol->getId() == varSymbol->getId()) 3578 { 3579 ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol()); 3580 ASSERT(listSymbol->getType() == varSymbol->getType()); 3581 ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier()); 3582 3583 return i; 3584 } 3585 } 3586 } 3587 } 3588 } 3589 3590 return -1; 3591 } 3592 lookup(VariableArray & list,TInterfaceBlock * block)3593 int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block) 3594 { 3595 for(unsigned int i = 0; i < list.size(); i++) 3596 { 3597 if(list[i] && (list[i]->getType().getInterfaceBlock() == block)) 3598 { 3599 return i; // Pointer match 3600 } 3601 } 3602 return -1; 3603 } 3604 allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3605 int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly) 3606 { 3607 int index = lookup(list, variable); 3608 3609 if(index == -1) 3610 { 3611 if (arrayExceedsLimits(variable)) 3612 { 3613 return -1; 3614 } 3615 unsigned int registerCount = variable->blockRegisterCount(samplersOnly); 3616 3617 for(unsigned int i = 0; i < list.size(); i++) 3618 { 3619 if(list[i] == 0) 3620 { 3621 unsigned int j = 1; 3622 for( ; j < registerCount && (i + j) < list.size(); j++) 3623 { 3624 if(list[i + j] != 0) 3625 { 3626 break; 3627 } 3628 } 3629 3630 if(j == registerCount) // Found free slots 3631 { 3632 for(unsigned int j = 0; j < registerCount; j++) 3633 { 3634 list[i + j] = variable; 3635 } 3636 3637 return i; 3638 } 3639 } 3640 } 3641 3642 index = list.size(); 3643 3644 for(unsigned int i = 0; i < registerCount; i++) 3645 { 3646 list.push_back(variable); 3647 } 3648 } 3649 3650 return index; 3651 } 3652 free(VariableArray & list,TIntermTyped * variable)3653 void OutputASM::free(VariableArray &list, TIntermTyped *variable) 3654 { 3655 int index = lookup(list, variable); 3656 3657 if(index >= 0) 3658 { 3659 list[index] = 0; 3660 } 3661 } 3662 blockMemberLookup(const TType & type,const TString & name,int registerIndex)3663 int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex) 3664 { 3665 const TInterfaceBlock *block = type.getInterfaceBlock(); 3666 3667 if(block) 3668 { 3669 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3670 const TFieldList& fields = block->fields(); 3671 const TString &blockName = block->name(); 3672 int fieldRegisterIndex = registerIndex; 3673 3674 if(!type.isInterfaceBlock()) 3675 { 3676 // This is a uniform that's part of a block, let's see if the block is already defined 3677 for(size_t i = 0; i < activeUniformBlocks.size(); ++i) 3678 { 3679 if(activeUniformBlocks[i].name == blockName.c_str()) 3680 { 3681 // The block is already defined, find the register for the current uniform and return it 3682 for(size_t j = 0; j < fields.size(); j++) 3683 { 3684 const TString &fieldName = fields[j]->name(); 3685 if(fieldName == name) 3686 { 3687 return fieldRegisterIndex; 3688 } 3689 3690 fieldRegisterIndex += fields[j]->type()->totalRegisterCount(); 3691 } 3692 3693 ASSERT(false); 3694 return fieldRegisterIndex; 3695 } 3696 } 3697 } 3698 } 3699 3700 return -1; 3701 } 3702 declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3703 void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder) 3704 { 3705 const TStructure *structure = type.getStruct(); 3706 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr; 3707 3708 if(!structure && !block) 3709 { 3710 ActiveUniforms &activeUniforms = shaderObject->activeUniforms; 3711 const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo(); 3712 if(blockId >= 0) 3713 { 3714 blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type))); 3715 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size()); 3716 } 3717 int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex; 3718 bool isSampler = IsSampler(type.getBasicType()); 3719 if(isSampler && samplersOnly) 3720 { 3721 for(int i = 0; i < type.totalRegisterCount(); i++) 3722 { 3723 shader->declareSampler(fieldRegisterIndex + i); 3724 } 3725 } 3726 if(isSampler == samplersOnly) 3727 { 3728 activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo)); 3729 } 3730 } 3731 else if(block) 3732 { 3733 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3734 const TFieldList& fields = block->fields(); 3735 const TString &blockName = block->name(); 3736 int fieldRegisterIndex = registerIndex; 3737 bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1); 3738 3739 blockId = activeUniformBlocks.size(); 3740 bool isRowMajor = block->matrixPacking() == EmpRowMajor; 3741 activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(), 3742 block->blockStorage(), isRowMajor, registerIndex, blockId)); 3743 blockDefinitions.push_back(BlockDefinitionIndexMap()); 3744 3745 Std140BlockEncoder currentBlockEncoder; 3746 currentBlockEncoder.enterAggregateType(); 3747 for(const auto &field : fields) 3748 { 3749 const TType &fieldType = *(field->type()); 3750 const TString &fieldName = field->name(); 3751 if(isUniformBlockMember && (fieldName == name)) 3752 { 3753 registerIndex = fieldRegisterIndex; 3754 } 3755 3756 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName; 3757 3758 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, ¤tBlockEncoder); 3759 fieldRegisterIndex += fieldType.totalRegisterCount(); 3760 } 3761 currentBlockEncoder.exitAggregateType(); 3762 activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize(); 3763 } 3764 else 3765 { 3766 // Store struct for program link time validation 3767 shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo())); 3768 3769 int fieldRegisterIndex = registerIndex; 3770 3771 const TFieldList& fields = structure->fields(); 3772 if(type.isArray() && (structure || type.isInterfaceBlock())) 3773 { 3774 for(int i = 0; i < type.getArraySize(); i++) 3775 { 3776 if(encoder) 3777 { 3778 encoder->enterAggregateType(); 3779 } 3780 for(const auto &field : fields) 3781 { 3782 const TType &fieldType = *(field->type()); 3783 const TString &fieldName = field->name(); 3784 const TString uniformName = name + "[" + str(i) + "]." + fieldName; 3785 3786 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3787 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3788 } 3789 if(encoder) 3790 { 3791 encoder->exitAggregateType(); 3792 } 3793 } 3794 } 3795 else 3796 { 3797 if(encoder) 3798 { 3799 encoder->enterAggregateType(); 3800 } 3801 for(const auto &field : fields) 3802 { 3803 const TType &fieldType = *(field->type()); 3804 const TString &fieldName = field->name(); 3805 const TString uniformName = name + "." + fieldName; 3806 3807 declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder); 3808 fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount(); 3809 } 3810 if(encoder) 3811 { 3812 encoder->exitAggregateType(); 3813 } 3814 } 3815 } 3816 } 3817 dim(TIntermNode * v)3818 int OutputASM::dim(TIntermNode *v) 3819 { 3820 TIntermTyped *vector = v->getAsTyped(); 3821 ASSERT(vector && vector->isRegister()); 3822 return vector->getNominalSize(); 3823 } 3824 dim2(TIntermNode * m)3825 int OutputASM::dim2(TIntermNode *m) 3826 { 3827 TIntermTyped *matrix = m->getAsTyped(); 3828 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray()); 3829 return matrix->getSecondarySize(); 3830 } 3831 3832 // Sets iterations to ~0u if no loop count could be statically determined. LoopInfo(TIntermLoop * node)3833 OutputASM::LoopInfo::LoopInfo(TIntermLoop *node) 3834 { 3835 // Parse loops of the form: 3836 // for(int index = initial; index [comparator] limit; index [op] increment) 3837 3838 // Parse index name and intial value 3839 if(node->getInit()) 3840 { 3841 TIntermAggregate *init = node->getInit()->getAsAggregate(); 3842 3843 if(init) 3844 { 3845 TIntermSequence &sequence = init->getSequence(); 3846 TIntermTyped *variable = sequence[0]->getAsTyped(); 3847 3848 if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt) 3849 { 3850 TIntermBinary *assign = variable->getAsBinaryNode(); 3851 3852 if(assign && assign->getOp() == EOpInitialize) 3853 { 3854 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode(); 3855 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion(); 3856 3857 if(symbol && constant) 3858 { 3859 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3860 { 3861 index = symbol; 3862 initial = constant->getUnionArrayPointer()[0].getIConst(); 3863 } 3864 } 3865 } 3866 } 3867 } 3868 } 3869 3870 // Parse comparator and limit value 3871 if(index && node->getCondition()) 3872 { 3873 TIntermBinary *test = node->getCondition()->getAsBinaryNode(); 3874 TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr; 3875 3876 if(left && (left->getId() == index->getId())) 3877 { 3878 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion(); 3879 3880 if(constant) 3881 { 3882 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3883 { 3884 comparator = test->getOp(); 3885 limit = constant->getUnionArrayPointer()[0].getIConst(); 3886 } 3887 } 3888 } 3889 } 3890 3891 // Parse increment 3892 if(index && comparator != EOpNull && node->getExpression()) 3893 { 3894 TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode(); 3895 TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode(); 3896 3897 if(binaryTerminal) 3898 { 3899 TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode(); 3900 3901 if(operand && operand->getId() == index->getId()) 3902 { 3903 TOperator op = binaryTerminal->getOp(); 3904 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion(); 3905 3906 if(constant) 3907 { 3908 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3909 { 3910 int value = constant->getUnionArrayPointer()[0].getIConst(); 3911 3912 switch(op) 3913 { 3914 case EOpAddAssign: increment = value; break; 3915 case EOpSubAssign: increment = -value; break; 3916 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3917 } 3918 } 3919 } 3920 } 3921 } 3922 else if(unaryTerminal) 3923 { 3924 TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode(); 3925 3926 if(operand && operand->getId() == index->getId()) 3927 { 3928 TOperator op = unaryTerminal->getOp(); 3929 3930 switch(op) 3931 { 3932 case EOpPostIncrement: increment = 1; break; 3933 case EOpPostDecrement: increment = -1; break; 3934 case EOpPreIncrement: increment = 1; break; 3935 case EOpPreDecrement: increment = -1; break; 3936 default: increment = 0; break; // Rare cases left unhandled. Treated as non-deterministic. 3937 } 3938 } 3939 } 3940 } 3941 3942 if(index && comparator != EOpNull && increment != 0) 3943 { 3944 // Check the loop body for return statements or changes to the index variable that make it non-deterministic. 3945 LoopUnrollable loopUnrollable; 3946 bool unrollable = loopUnrollable.traverse(node, index->getId()); 3947 3948 if(!unrollable) 3949 { 3950 iterations = ~0u; 3951 return; 3952 } 3953 3954 if(comparator == EOpLessThanEqual) 3955 { 3956 comparator = EOpLessThan; 3957 limit += 1; 3958 } 3959 else if(comparator == EOpGreaterThanEqual) 3960 { 3961 comparator = EOpLessThan; 3962 limit -= 1; 3963 std::swap(initial, limit); 3964 increment = -increment; 3965 } 3966 else if(comparator == EOpGreaterThan) 3967 { 3968 comparator = EOpLessThan; 3969 std::swap(initial, limit); 3970 increment = -increment; 3971 } 3972 3973 if(comparator == EOpLessThan) 3974 { 3975 if(!(initial < limit)) // Never loops 3976 { 3977 iterations = 0; 3978 } 3979 else if(increment < 0) 3980 { 3981 iterations = ~0u; 3982 } 3983 else 3984 { 3985 iterations = (limit - initial + abs(increment) - 1) / increment; // Ceiling division 3986 } 3987 } 3988 else 3989 { 3990 // Rare cases left unhandled. Treated as non-deterministic. 3991 iterations = ~0u; 3992 } 3993 } 3994 } 3995 traverse(TIntermLoop * loop,int indexId)3996 bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId) 3997 { 3998 loopUnrollable = true; 3999 4000 loopIndexId = indexId; 4001 TIntermNode *body = loop->getBody(); 4002 4003 if(body) 4004 { 4005 body->traverse(this); 4006 } 4007 4008 return loopUnrollable; 4009 } 4010 visitSymbol(TIntermSymbol * node)4011 void LoopUnrollable::visitSymbol(TIntermSymbol *node) 4012 { 4013 // Check that the loop index is not used as the argument to a function out or inout parameter. 4014 if(node->getId() == loopIndexId) 4015 { 4016 if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut) 4017 { 4018 loopUnrollable = false; 4019 } 4020 } 4021 } 4022 visitBinary(Visit visit,TIntermBinary * node)4023 bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node) 4024 { 4025 if(!loopUnrollable) 4026 { 4027 return false; 4028 } 4029 4030 // Check that the loop index is not statically assigned to. 4031 TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode(); 4032 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 4033 4034 return loopUnrollable; 4035 } 4036 visitUnary(Visit visit,TIntermUnary * node)4037 bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node) 4038 { 4039 if(!loopUnrollable) 4040 { 4041 return false; 4042 } 4043 4044 // Check that the loop index is not statically assigned to. 4045 TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode(); 4046 loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId)); 4047 4048 return loopUnrollable; 4049 } 4050 visitBranch(Visit visit,TIntermBranch * node)4051 bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node) 4052 { 4053 if(!loopUnrollable) 4054 { 4055 return false; 4056 } 4057 4058 switch(node->getFlowOp()) 4059 { 4060 case EOpKill: 4061 case EOpReturn: 4062 case EOpBreak: 4063 case EOpContinue: 4064 loopUnrollable = false; 4065 break; 4066 default: UNREACHABLE(node->getFlowOp()); 4067 } 4068 4069 return loopUnrollable; 4070 } 4071 visitAggregate(Visit visit,TIntermAggregate * node)4072 bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node) 4073 { 4074 return loopUnrollable; 4075 } 4076 } 4077