1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "OutputASM.h" 16 #include "Common/Math.hpp" 17 18 #include "common/debug.h" 19 #include "InfoSink.h" 20 21 #include "libGLESv2/Shader.h" 22 23 #include <GLES2/gl2.h> 24 #include <GLES2/gl2ext.h> 25 #include <GLES3/gl3.h> 26 27 #include <stdlib.h> 28 29 namespace glsl 30 { 31 // Integer to TString conversion str(int i)32 TString str(int i) 33 { 34 char buffer[20]; 35 sprintf(buffer, "%d", i); 36 return buffer; 37 } 38 39 class Temporary : public TIntermSymbol 40 { 41 public: Temporary(OutputASM * assembler)42 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler) 43 { 44 } 45 ~Temporary()46 ~Temporary() 47 { 48 assembler->freeTemporary(this); 49 } 50 51 private: 52 OutputASM *const assembler; 53 }; 54 55 class Constant : public TIntermConstantUnion 56 { 57 public: Constant(float x,float y,float z,float w)58 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false)) 59 { 60 constants[0].setFConst(x); 61 constants[1].setFConst(y); 62 constants[2].setFConst(z); 63 constants[3].setFConst(w); 64 } 65 Constant(bool b)66 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false)) 67 { 68 constants[0].setBConst(b); 69 } 70 Constant(int i)71 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false)) 72 { 73 constants[0].setIConst(i); 74 } 75 ~Constant()76 ~Constant() 77 { 78 } 79 80 private: 81 ConstantUnion constants[4]; 82 }; 83 Uniform(GLenum type,GLenum precision,const std::string & name,int arraySize,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)84 Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) : 85 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo) 86 { 87 } 88 UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)89 UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize, 90 TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) : 91 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout), 92 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId) 93 { 94 } 95 BlockLayoutEncoder(bool rowMajor)96 BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor) 97 : mCurrentOffset(0), isRowMajor(rowMajor) 98 { 99 } 100 encodeType(const TType & type)101 BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type) 102 { 103 int arrayStride; 104 int matrixStride; 105 106 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride); 107 108 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent), 109 static_cast<int>(arrayStride * BytesPerComponent), 110 static_cast<int>(matrixStride * BytesPerComponent), 111 (matrixStride > 0) && isRowMajor); 112 113 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride); 114 115 return memberInfo; 116 } 117 118 // static getBlockRegister(const BlockMemberInfo & info)119 size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info) 120 { 121 return (info.offset / BytesPerComponent) / ComponentsPerRegister; 122 } 123 124 // static getBlockRegisterElement(const BlockMemberInfo & info)125 size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info) 126 { 127 return (info.offset / BytesPerComponent) % ComponentsPerRegister; 128 } 129 nextRegister()130 void BlockLayoutEncoder::nextRegister() 131 { 132 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister); 133 } 134 Std140BlockEncoder(bool rowMajor)135 Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor) 136 { 137 } 138 enterAggregateType()139 void Std140BlockEncoder::enterAggregateType() 140 { 141 nextRegister(); 142 } 143 exitAggregateType()144 void Std140BlockEncoder::exitAggregateType() 145 { 146 nextRegister(); 147 } 148 getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)149 void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut) 150 { 151 size_t baseAlignment = 0; 152 int matrixStride = 0; 153 int arrayStride = 0; 154 155 if(type.isMatrix()) 156 { 157 baseAlignment = ComponentsPerRegister; 158 matrixStride = ComponentsPerRegister; 159 160 if(arraySize > 0) 161 { 162 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 163 arrayStride = ComponentsPerRegister * numRegisters; 164 } 165 } 166 else if(arraySize > 0) 167 { 168 baseAlignment = ComponentsPerRegister; 169 arrayStride = ComponentsPerRegister; 170 } 171 else 172 { 173 const size_t numComponents = type.getElementSize(); 174 baseAlignment = (numComponents == 3 ? 4u : numComponents); 175 } 176 177 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment); 178 179 *matrixStrideOut = matrixStride; 180 *arrayStrideOut = arrayStride; 181 } 182 advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)183 void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride) 184 { 185 if(arraySize > 0) 186 { 187 mCurrentOffset += arrayStride * arraySize; 188 } 189 else if(type.isMatrix()) 190 { 191 ASSERT(matrixStride == ComponentsPerRegister); 192 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize(); 193 mCurrentOffset += ComponentsPerRegister * numRegisters; 194 } 195 else 196 { 197 mCurrentOffset += type.getElementSize(); 198 } 199 } 200 Attribute()201 Attribute::Attribute() 202 { 203 type = GL_NONE; 204 arraySize = 0; 205 registerIndex = 0; 206 } 207 Attribute(GLenum type,const std::string & name,int arraySize,int location,int registerIndex)208 Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex) 209 { 210 this->type = type; 211 this->name = name; 212 this->arraySize = arraySize; 213 this->location = location; 214 this->registerIndex = registerIndex; 215 } 216 getPixelShader() const217 sw::PixelShader *Shader::getPixelShader() const 218 { 219 return 0; 220 } 221 getVertexShader() const222 sw::VertexShader *Shader::getVertexShader() const 223 { 224 return 0; 225 } 226 TextureFunction(const TString & nodeName)227 OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false) 228 { 229 TString name = TFunction::unmangleName(nodeName); 230 231 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D") 232 { 233 method = IMPLICIT; 234 } 235 else if(name == "texture2DProj" || name == "textureProj") 236 { 237 method = IMPLICIT; 238 proj = true; 239 } 240 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod") 241 { 242 method = LOD; 243 } 244 else if(name == "texture2DProjLod" || name == "textureProjLod") 245 { 246 method = LOD; 247 proj = true; 248 } 249 else if(name == "textureSize") 250 { 251 method = SIZE; 252 } 253 else if(name == "textureOffset") 254 { 255 method = IMPLICIT; 256 offset = true; 257 } 258 else if(name == "textureProjOffset") 259 { 260 method = IMPLICIT; 261 offset = true; 262 proj = true; 263 } 264 else if(name == "textureLodOffset") 265 { 266 method = LOD; 267 offset = true; 268 } 269 else if(name == "textureProjLodOffset") 270 { 271 method = LOD; 272 proj = true; 273 offset = true; 274 } 275 else if(name == "texelFetch") 276 { 277 method = FETCH; 278 } 279 else if(name == "texelFetchOffset") 280 { 281 method = FETCH; 282 offset = true; 283 } 284 else if(name == "textureGrad") 285 { 286 method = GRAD; 287 } 288 else if(name == "textureGradOffset") 289 { 290 method = GRAD; 291 offset = true; 292 } 293 else if(name == "textureProjGrad") 294 { 295 method = GRAD; 296 proj = true; 297 } 298 else if(name == "textureProjGradOffset") 299 { 300 method = GRAD; 301 proj = true; 302 offset = true; 303 } 304 else UNREACHABLE(0); 305 } 306 OutputASM(TParseContext & context,Shader * shaderObject)307 OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context) 308 { 309 shader = 0; 310 pixelShader = 0; 311 vertexShader = 0; 312 313 if(shaderObject) 314 { 315 shader = shaderObject->getShader(); 316 pixelShader = shaderObject->getPixelShader(); 317 vertexShader = shaderObject->getVertexShader(); 318 } 319 320 functionArray.push_back(Function(0, "main(", 0, 0)); 321 currentFunction = 0; 322 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData 323 } 324 ~OutputASM()325 OutputASM::~OutputASM() 326 { 327 } 328 output()329 void OutputASM::output() 330 { 331 if(shader) 332 { 333 emitShader(GLOBAL); 334 335 if(functionArray.size() > 1) // Only call main() when there are other functions 336 { 337 Instruction *callMain = emit(sw::Shader::OPCODE_CALL); 338 callMain->dst.type = sw::Shader::PARAMETER_LABEL; 339 callMain->dst.index = 0; // main() 340 341 emit(sw::Shader::OPCODE_RET); 342 } 343 344 emitShader(FUNCTION); 345 } 346 } 347 emitShader(Scope scope)348 void OutputASM::emitShader(Scope scope) 349 { 350 emitScope = scope; 351 currentScope = GLOBAL; 352 mContext.getTreeRoot()->traverse(this); 353 } 354 freeTemporary(Temporary * temporary)355 void OutputASM::freeTemporary(Temporary *temporary) 356 { 357 free(temporaries, temporary); 358 } 359 getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const360 sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const 361 { 362 TBasicType baseType = in->getType().getBasicType(); 363 364 switch(op) 365 { 366 case sw::Shader::OPCODE_NEG: 367 switch(baseType) 368 { 369 case EbtInt: 370 case EbtUInt: 371 return sw::Shader::OPCODE_INEG; 372 case EbtFloat: 373 default: 374 return op; 375 } 376 case sw::Shader::OPCODE_ABS: 377 switch(baseType) 378 { 379 case EbtInt: 380 return sw::Shader::OPCODE_IABS; 381 case EbtFloat: 382 default: 383 return op; 384 } 385 case sw::Shader::OPCODE_SGN: 386 switch(baseType) 387 { 388 case EbtInt: 389 return sw::Shader::OPCODE_ISGN; 390 case EbtFloat: 391 default: 392 return op; 393 } 394 case sw::Shader::OPCODE_ADD: 395 switch(baseType) 396 { 397 case EbtInt: 398 case EbtUInt: 399 return sw::Shader::OPCODE_IADD; 400 case EbtFloat: 401 default: 402 return op; 403 } 404 case sw::Shader::OPCODE_SUB: 405 switch(baseType) 406 { 407 case EbtInt: 408 case EbtUInt: 409 return sw::Shader::OPCODE_ISUB; 410 case EbtFloat: 411 default: 412 return op; 413 } 414 case sw::Shader::OPCODE_MUL: 415 switch(baseType) 416 { 417 case EbtInt: 418 case EbtUInt: 419 return sw::Shader::OPCODE_IMUL; 420 case EbtFloat: 421 default: 422 return op; 423 } 424 case sw::Shader::OPCODE_DIV: 425 switch(baseType) 426 { 427 case EbtInt: 428 return sw::Shader::OPCODE_IDIV; 429 case EbtUInt: 430 return sw::Shader::OPCODE_UDIV; 431 case EbtFloat: 432 default: 433 return op; 434 } 435 case sw::Shader::OPCODE_IMOD: 436 return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op; 437 case sw::Shader::OPCODE_ISHR: 438 return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op; 439 case sw::Shader::OPCODE_MIN: 440 switch(baseType) 441 { 442 case EbtInt: 443 return sw::Shader::OPCODE_IMIN; 444 case EbtUInt: 445 return sw::Shader::OPCODE_UMIN; 446 case EbtFloat: 447 default: 448 return op; 449 } 450 case sw::Shader::OPCODE_MAX: 451 switch(baseType) 452 { 453 case EbtInt: 454 return sw::Shader::OPCODE_IMAX; 455 case EbtUInt: 456 return sw::Shader::OPCODE_UMAX; 457 case EbtFloat: 458 default: 459 return op; 460 } 461 default: 462 return op; 463 } 464 } 465 visitSymbol(TIntermSymbol * symbol)466 void OutputASM::visitSymbol(TIntermSymbol *symbol) 467 { 468 // Vertex varyings don't have to be actively used to successfully link 469 // against pixel shaders that use them. So make sure they're declared. 470 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut) 471 { 472 if(symbol->getBasicType() != EbtInvariant) // Typeless declarations are not new varyings 473 { 474 declareVarying(symbol, -1); 475 } 476 } 477 478 TInterfaceBlock* block = symbol->getType().getInterfaceBlock(); 479 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables: 480 // "All members of a named uniform block declared with a shared or std140 layout qualifier 481 // are considered active, even if they are not referenced in any shader in the program. 482 // The uniform block itself is also considered active, even if no member of the block is referenced." 483 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140))) 484 { 485 uniformRegister(symbol); 486 } 487 } 488 visitBinary(Visit visit,TIntermBinary * node)489 bool OutputASM::visitBinary(Visit visit, TIntermBinary *node) 490 { 491 if(currentScope != emitScope) 492 { 493 return false; 494 } 495 496 TIntermTyped *result = node; 497 TIntermTyped *left = node->getLeft(); 498 TIntermTyped *right = node->getRight(); 499 const TType &leftType = left->getType(); 500 const TType &rightType = right->getType(); 501 502 if(isSamplerRegister(result)) 503 { 504 return false; // Don't traverse, the register index is determined statically 505 } 506 507 switch(node->getOp()) 508 { 509 case EOpAssign: 510 if(visit == PostVisit) 511 { 512 assignLvalue(left, right); 513 copy(result, right); 514 } 515 break; 516 case EOpInitialize: 517 if(visit == PostVisit) 518 { 519 copy(left, right); 520 } 521 break; 522 case EOpMatrixTimesScalarAssign: 523 if(visit == PostVisit) 524 { 525 for(int i = 0; i < leftType.getNominalSize(); i++) 526 { 527 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right); 528 } 529 530 assignLvalue(left, result); 531 } 532 break; 533 case EOpVectorTimesMatrixAssign: 534 if(visit == PostVisit) 535 { 536 int size = leftType.getNominalSize(); 537 538 for(int i = 0; i < size; i++) 539 { 540 Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i); 541 dot->dst.mask = 1 << i; 542 } 543 544 assignLvalue(left, result); 545 } 546 break; 547 case EOpMatrixTimesMatrixAssign: 548 if(visit == PostVisit) 549 { 550 int dim = leftType.getNominalSize(); 551 552 for(int i = 0; i < dim; i++) 553 { 554 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 555 mul->src[1].swizzle = 0x00; 556 557 for(int j = 1; j < dim; j++) 558 { 559 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 560 mad->src[1].swizzle = j * 0x55; 561 } 562 } 563 564 assignLvalue(left, result); 565 } 566 break; 567 case EOpIndexDirect: 568 if(visit == PostVisit) 569 { 570 int index = right->getAsConstantUnion()->getIConst(0); 571 572 if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock()) 573 { 574 ASSERT(left->isArray()); 575 copy(result, left, index * left->elementRegisterCount()); 576 } 577 else if(result->isRegister()) 578 { 579 int srcIndex = 0; 580 if(left->isRegister()) 581 { 582 srcIndex = 0; 583 } 584 else if(left->isArray()) 585 { 586 srcIndex = index * left->elementRegisterCount(); 587 } 588 else if(left->isMatrix()) 589 { 590 ASSERT(index < left->getNominalSize()); // FIXME: Report semantic error 591 srcIndex = index; 592 } 593 else UNREACHABLE(0); 594 595 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex); 596 597 if(left->isRegister()) 598 { 599 mov->src[0].swizzle = index; 600 } 601 } 602 else UNREACHABLE(0); 603 } 604 break; 605 case EOpIndexIndirect: 606 if(visit == PostVisit) 607 { 608 if(left->isArray() || left->isMatrix()) 609 { 610 for(int index = 0; index < result->totalRegisterCount(); index++) 611 { 612 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index); 613 mov->dst.mask = writeMask(result, index); 614 615 if(left->totalRegisterCount() > 1) 616 { 617 sw::Shader::SourceParameter relativeRegister; 618 argument(relativeRegister, right); 619 620 mov->src[0].rel.type = relativeRegister.type; 621 mov->src[0].rel.index = relativeRegister.index; 622 mov->src[0].rel.scale = result->totalRegisterCount(); 623 mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform); 624 } 625 } 626 } 627 else if(left->isRegister()) 628 { 629 emit(sw::Shader::OPCODE_EXTRACT, result, left, right); 630 } 631 else UNREACHABLE(0); 632 } 633 break; 634 case EOpIndexDirectStruct: 635 case EOpIndexDirectInterfaceBlock: 636 if(visit == PostVisit) 637 { 638 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock())); 639 640 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ? 641 leftType.getStruct()->fields() : 642 leftType.getInterfaceBlock()->fields(); 643 int index = right->getAsConstantUnion()->getIConst(0); 644 int fieldOffset = 0; 645 646 for(int i = 0; i < index; i++) 647 { 648 fieldOffset += fields[i]->type()->totalRegisterCount(); 649 } 650 651 copy(result, left, fieldOffset); 652 } 653 break; 654 case EOpVectorSwizzle: 655 if(visit == PostVisit) 656 { 657 int swizzle = 0; 658 TIntermAggregate *components = right->getAsAggregate(); 659 660 if(components) 661 { 662 TIntermSequence &sequence = components->getSequence(); 663 int component = 0; 664 665 for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++) 666 { 667 TIntermConstantUnion *element = (*sit)->getAsConstantUnion(); 668 669 if(element) 670 { 671 int i = element->getUnionArrayPointer()[0].getIConst(); 672 swizzle |= i << (component * 2); 673 component++; 674 } 675 else UNREACHABLE(0); 676 } 677 } 678 else UNREACHABLE(0); 679 680 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left); 681 mov->src[0].swizzle = swizzle; 682 } 683 break; 684 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break; 685 case EOpAdd: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right); break; 686 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break; 687 case EOpSub: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right); break; 688 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break; 689 case EOpMul: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right); break; 690 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break; 691 case EOpDiv: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right); break; 692 case EOpIModAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break; 693 case EOpIMod: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right); break; 694 case EOpBitShiftLeftAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break; 695 case EOpBitShiftLeft: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right); break; 696 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break; 697 case EOpBitShiftRight: if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right); break; 698 case EOpBitwiseAndAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break; 699 case EOpBitwiseAnd: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right); break; 700 case EOpBitwiseXorAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break; 701 case EOpBitwiseXor: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right); break; 702 case EOpBitwiseOrAssign: if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right); break; 703 case EOpBitwiseOr: if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right); break; 704 case EOpEqual: 705 if(visit == PostVisit) 706 { 707 emitBinary(sw::Shader::OPCODE_EQ, result, left, right); 708 709 for(int index = 1; index < left->totalRegisterCount(); index++) 710 { 711 Temporary equal(this); 712 emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index); 713 emit(sw::Shader::OPCODE_AND, result, result, &equal); 714 } 715 } 716 break; 717 case EOpNotEqual: 718 if(visit == PostVisit) 719 { 720 emitBinary(sw::Shader::OPCODE_NE, result, left, right); 721 722 for(int index = 1; index < left->totalRegisterCount(); index++) 723 { 724 Temporary notEqual(this); 725 emit(sw::Shader::OPCODE_NE, ¬Equal, 0, left, index, right, index); 726 emit(sw::Shader::OPCODE_OR, result, result, ¬Equal); 727 } 728 } 729 break; 730 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break; 731 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break; 732 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break; 733 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break; 734 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break; 735 case EOpVectorTimesScalar: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break; 736 case EOpMatrixTimesScalar: 737 if(visit == PostVisit) 738 { 739 if(left->isMatrix()) 740 { 741 for(int i = 0; i < leftType.getNominalSize(); i++) 742 { 743 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0); 744 } 745 } 746 else if(right->isMatrix()) 747 { 748 for(int i = 0; i < rightType.getNominalSize(); i++) 749 { 750 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 751 } 752 } 753 else UNREACHABLE(0); 754 } 755 break; 756 case EOpVectorTimesMatrix: 757 if(visit == PostVisit) 758 { 759 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize()); 760 761 int size = rightType.getNominalSize(); 762 for(int i = 0; i < size; i++) 763 { 764 Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i); 765 dot->dst.mask = 1 << i; 766 } 767 } 768 break; 769 case EOpMatrixTimesVector: 770 if(visit == PostVisit) 771 { 772 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right); 773 mul->src[1].swizzle = 0x00; 774 775 int size = rightType.getNominalSize(); 776 for(int i = 1; i < size; i++) 777 { 778 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result); 779 mad->src[1].swizzle = i * 0x55; 780 } 781 } 782 break; 783 case EOpMatrixTimesMatrix: 784 if(visit == PostVisit) 785 { 786 int dim = leftType.getNominalSize(); 787 788 int size = rightType.getNominalSize(); 789 for(int i = 0; i < size; i++) 790 { 791 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i); 792 mul->src[1].swizzle = 0x00; 793 794 for(int j = 1; j < dim; j++) 795 { 796 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i); 797 mad->src[1].swizzle = j * 0x55; 798 } 799 } 800 } 801 break; 802 case EOpLogicalOr: 803 if(trivial(right, 6)) 804 { 805 if(visit == PostVisit) 806 { 807 emit(sw::Shader::OPCODE_OR, result, left, right); 808 } 809 } 810 else // Short-circuit evaluation 811 { 812 if(visit == InVisit) 813 { 814 emit(sw::Shader::OPCODE_MOV, result, left); 815 Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result); 816 ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT; 817 } 818 else if(visit == PostVisit) 819 { 820 emit(sw::Shader::OPCODE_MOV, result, right); 821 emit(sw::Shader::OPCODE_ENDIF); 822 } 823 } 824 break; 825 case EOpLogicalXor: if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break; 826 case EOpLogicalAnd: 827 if(trivial(right, 6)) 828 { 829 if(visit == PostVisit) 830 { 831 emit(sw::Shader::OPCODE_AND, result, left, right); 832 } 833 } 834 else // Short-circuit evaluation 835 { 836 if(visit == InVisit) 837 { 838 emit(sw::Shader::OPCODE_MOV, result, left); 839 emit(sw::Shader::OPCODE_IF, 0, result); 840 } 841 else if(visit == PostVisit) 842 { 843 emit(sw::Shader::OPCODE_MOV, result, right); 844 emit(sw::Shader::OPCODE_ENDIF); 845 } 846 } 847 break; 848 default: UNREACHABLE(node->getOp()); 849 } 850 851 return true; 852 } 853 emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)854 void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow) 855 { 856 switch(size) 857 { 858 case 1: // Used for cofactor computation only 859 { 860 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 861 bool isMov = (row == col); 862 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG; 863 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row); 864 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col); 865 mov->dst.mask = 1 << outRow; 866 } 867 break; 868 case 2: 869 { 870 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy 871 872 bool isCofactor = (col >= 0) && (row >= 0); 873 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 874 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 875 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 876 877 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1); 878 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2]; 879 det->dst.mask = 1 << outRow; 880 } 881 break; 882 case 3: 883 { 884 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw 885 886 bool isCofactor = (col >= 0) && (row >= 0); 887 int col0 = (isCofactor && (col <= 0)) ? 1 : 0; 888 int col1 = (isCofactor && (col <= 1)) ? 2 : 1; 889 int col2 = (isCofactor && (col <= 2)) ? 3 : 2; 890 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01)); 891 892 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2); 893 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3]; 894 det->dst.mask = 1 << outRow; 895 } 896 break; 897 case 4: 898 { 899 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3); 900 det->dst.mask = 1 << outRow; 901 } 902 break; 903 default: 904 UNREACHABLE(size); 905 break; 906 } 907 } 908 visitUnary(Visit visit,TIntermUnary * node)909 bool OutputASM::visitUnary(Visit visit, TIntermUnary *node) 910 { 911 if(currentScope != emitScope) 912 { 913 return false; 914 } 915 916 TIntermTyped *result = node; 917 TIntermTyped *arg = node->getOperand(); 918 TBasicType basicType = arg->getType().getBasicType(); 919 920 union 921 { 922 float f; 923 int i; 924 } one_value; 925 926 if(basicType == EbtInt || basicType == EbtUInt) 927 { 928 one_value.i = 1; 929 } 930 else 931 { 932 one_value.f = 1.0f; 933 } 934 935 Constant one(one_value.f, one_value.f, one_value.f, one_value.f); 936 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f); 937 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f); 938 939 switch(node->getOp()) 940 { 941 case EOpNegative: 942 if(visit == PostVisit) 943 { 944 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg); 945 for(int index = 0; index < arg->totalRegisterCount(); index++) 946 { 947 emit(negOpcode, result, index, arg, index); 948 } 949 } 950 break; 951 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 952 case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 953 case EOpBitwiseNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break; 954 case EOpPostIncrement: 955 if(visit == PostVisit) 956 { 957 copy(result, arg); 958 959 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 960 for(int index = 0; index < arg->totalRegisterCount(); index++) 961 { 962 emit(addOpcode, arg, index, arg, index, &one); 963 } 964 965 assignLvalue(arg, arg); 966 } 967 break; 968 case EOpPostDecrement: 969 if(visit == PostVisit) 970 { 971 copy(result, arg); 972 973 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 974 for(int index = 0; index < arg->totalRegisterCount(); index++) 975 { 976 emit(subOpcode, arg, index, arg, index, &one); 977 } 978 979 assignLvalue(arg, arg); 980 } 981 break; 982 case EOpPreIncrement: 983 if(visit == PostVisit) 984 { 985 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg); 986 for(int index = 0; index < arg->totalRegisterCount(); index++) 987 { 988 emit(addOpcode, result, index, arg, index, &one); 989 } 990 991 assignLvalue(arg, result); 992 } 993 break; 994 case EOpPreDecrement: 995 if(visit == PostVisit) 996 { 997 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg); 998 for(int index = 0; index < arg->totalRegisterCount(); index++) 999 { 1000 emit(subOpcode, result, index, arg, index, &one); 1001 } 1002 1003 assignLvalue(arg, result); 1004 } 1005 break; 1006 case EOpRadians: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break; 1007 case EOpDegrees: if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, °); break; 1008 case EOpSin: if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break; 1009 case EOpCos: if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break; 1010 case EOpTan: if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break; 1011 case EOpAsin: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break; 1012 case EOpAcos: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break; 1013 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break; 1014 case EOpSinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break; 1015 case EOpCosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break; 1016 case EOpTanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break; 1017 case EOpAsinh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break; 1018 case EOpAcosh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break; 1019 case EOpAtanh: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break; 1020 case EOpExp: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break; 1021 case EOpLog: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break; 1022 case EOpExp2: if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break; 1023 case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break; 1024 case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break; 1025 case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break; 1026 case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break; 1027 case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break; 1028 case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break; 1029 case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break; 1030 case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break; 1031 case EOpRoundEven: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break; 1032 case EOpCeil: if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break; 1033 case EOpFract: if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break; 1034 case EOpIsNan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break; 1035 case EOpIsInf: if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break; 1036 case EOpLength: if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break; 1037 case EOpNormalize: if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break; 1038 case EOpDFdx: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break; 1039 case EOpDFdy: if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break; 1040 case EOpFwidth: if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break; 1041 case EOpAny: if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break; 1042 case EOpAll: if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break; 1043 case EOpFloatBitsToInt: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break; 1044 case EOpFloatBitsToUint: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break; 1045 case EOpIntBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break; 1046 case EOpUintBitsToFloat: if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break; 1047 case EOpPackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break; 1048 case EOpPackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break; 1049 case EOpPackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break; 1050 case EOpUnpackSnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break; 1051 case EOpUnpackUnorm2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break; 1052 case EOpUnpackHalf2x16: if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break; 1053 case EOpTranspose: 1054 if(visit == PostVisit) 1055 { 1056 int numCols = arg->getNominalSize(); 1057 int numRows = arg->getSecondarySize(); 1058 for(int i = 0; i < numCols; ++i) 1059 { 1060 for(int j = 0; j < numRows; ++j) 1061 { 1062 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i); 1063 mov->src[0].swizzle = 0x55 * j; 1064 mov->dst.mask = 1 << i; 1065 } 1066 } 1067 } 1068 break; 1069 case EOpDeterminant: 1070 if(visit == PostVisit) 1071 { 1072 int size = arg->getNominalSize(); 1073 ASSERT(size == arg->getSecondarySize()); 1074 1075 emitDeterminant(result, arg, size); 1076 } 1077 break; 1078 case EOpInverse: 1079 if(visit == PostVisit) 1080 { 1081 int size = arg->getNominalSize(); 1082 ASSERT(size == arg->getSecondarySize()); 1083 1084 // Compute transposed matrix of cofactors 1085 for(int i = 0; i < size; ++i) 1086 { 1087 for(int j = 0; j < size; ++j) 1088 { 1089 // For a 2x2 matrix, the cofactor is simply a transposed move or negate 1090 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant 1091 emitDeterminant(result, arg, size - 1, j, i, i, j); 1092 } 1093 } 1094 1095 // Compute 1 / determinant 1096 Temporary invDet(this); 1097 emitDeterminant(&invDet, arg, size); 1098 Constant one(1.0f, 1.0f, 1.0f, 1.0f); 1099 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet); 1100 div->src[1].swizzle = 0x00; // xxxx 1101 1102 // Divide transposed matrix of cofactors by determinant 1103 for(int i = 0; i < size; ++i) 1104 { 1105 emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet); 1106 } 1107 } 1108 break; 1109 default: UNREACHABLE(node->getOp()); 1110 } 1111 1112 return true; 1113 } 1114 visitAggregate(Visit visit,TIntermAggregate * node)1115 bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node) 1116 { 1117 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence) 1118 { 1119 return false; 1120 } 1121 1122 Constant zero(0.0f, 0.0f, 0.0f, 0.0f); 1123 1124 TIntermTyped *result = node; 1125 const TType &resultType = node->getType(); 1126 TIntermSequence &arg = node->getSequence(); 1127 size_t argumentCount = arg.size(); 1128 1129 switch(node->getOp()) 1130 { 1131 case EOpSequence: break; 1132 case EOpDeclaration: break; 1133 case EOpInvariantDeclaration: break; 1134 case EOpPrototype: break; 1135 case EOpComma: 1136 if(visit == PostVisit) 1137 { 1138 copy(result, arg[1]); 1139 } 1140 break; 1141 case EOpFunction: 1142 if(visit == PreVisit) 1143 { 1144 const TString &name = node->getName(); 1145 1146 if(emitScope == FUNCTION) 1147 { 1148 if(functionArray.size() > 1) // No need for a label when there's only main() 1149 { 1150 Instruction *label = emit(sw::Shader::OPCODE_LABEL); 1151 label->dst.type = sw::Shader::PARAMETER_LABEL; 1152 1153 const Function *function = findFunction(name); 1154 ASSERT(function); // Should have been added during global pass 1155 label->dst.index = function->label; 1156 currentFunction = function->label; 1157 } 1158 } 1159 else if(emitScope == GLOBAL) 1160 { 1161 if(name != "main(") 1162 { 1163 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence(); 1164 functionArray.push_back(Function(functionArray.size(), name, &arguments, node)); 1165 } 1166 } 1167 else UNREACHABLE(emitScope); 1168 1169 currentScope = FUNCTION; 1170 } 1171 else if(visit == PostVisit) 1172 { 1173 if(emitScope == FUNCTION) 1174 { 1175 if(functionArray.size() > 1) // No need to return when there's only main() 1176 { 1177 emit(sw::Shader::OPCODE_RET); 1178 } 1179 } 1180 1181 currentScope = GLOBAL; 1182 } 1183 break; 1184 case EOpFunctionCall: 1185 if(visit == PostVisit) 1186 { 1187 if(node->isUserDefined()) 1188 { 1189 const TString &name = node->getName(); 1190 const Function *function = findFunction(name); 1191 1192 if(!function) 1193 { 1194 mContext.error(node->getLine(), "function definition not found", name.c_str()); 1195 return false; 1196 } 1197 1198 TIntermSequence &arguments = *function->arg; 1199 1200 for(size_t i = 0; i < argumentCount; i++) 1201 { 1202 TIntermTyped *in = arguments[i]->getAsTyped(); 1203 1204 if(in->getQualifier() == EvqIn || 1205 in->getQualifier() == EvqInOut || 1206 in->getQualifier() == EvqConstReadOnly) 1207 { 1208 copy(in, arg[i]); 1209 } 1210 } 1211 1212 Instruction *call = emit(sw::Shader::OPCODE_CALL); 1213 call->dst.type = sw::Shader::PARAMETER_LABEL; 1214 call->dst.index = function->label; 1215 1216 if(function->ret && function->ret->getType().getBasicType() != EbtVoid) 1217 { 1218 copy(result, function->ret); 1219 } 1220 1221 for(size_t i = 0; i < argumentCount; i++) 1222 { 1223 TIntermTyped *argument = arguments[i]->getAsTyped(); 1224 TIntermTyped *out = arg[i]->getAsTyped(); 1225 1226 if(argument->getQualifier() == EvqOut || 1227 argument->getQualifier() == EvqInOut) 1228 { 1229 assignLvalue(out, argument); 1230 } 1231 } 1232 } 1233 else 1234 { 1235 const TextureFunction textureFunction(node->getName()); 1236 TIntermTyped *t = arg[1]->getAsTyped(); 1237 1238 Temporary coord(this); 1239 1240 if(textureFunction.proj) 1241 { 1242 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]); 1243 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1); 1244 rcp->dst.mask = 0x7; 1245 1246 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord); 1247 mul->dst.mask = 0x7; 1248 } 1249 else 1250 { 1251 emit(sw::Shader::OPCODE_MOV, &coord, arg[1]); 1252 } 1253 1254 switch(textureFunction.method) 1255 { 1256 case TextureFunction::IMPLICIT: 1257 { 1258 TIntermNode* offset = textureFunction.offset ? arg[2] : 0; 1259 1260 if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3)) 1261 { 1262 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX, 1263 result, &coord, arg[0], offset); 1264 } 1265 else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4)) // bias 1266 { 1267 Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]); 1268 bias->dst.mask = 0x8; 1269 1270 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX, 1271 result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction 1272 tex->bias = true; 1273 } 1274 else UNREACHABLE(argumentCount); 1275 } 1276 break; 1277 case TextureFunction::LOD: 1278 { 1279 Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]); 1280 lod->dst.mask = 0x8; 1281 1282 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL, 1283 result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr); 1284 } 1285 break; 1286 case TextureFunction::FETCH: 1287 { 1288 if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4)) 1289 { 1290 Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]); 1291 lod->dst.mask = 0x8; 1292 1293 TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr; 1294 1295 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH, 1296 result, &coord, arg[0], offset); 1297 } 1298 else UNREACHABLE(argumentCount); 1299 } 1300 break; 1301 case TextureFunction::GRAD: 1302 { 1303 if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5)) 1304 { 1305 TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr; 1306 1307 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD, 1308 result, &coord, arg[0], arg[2], arg[3], offset); 1309 } 1310 else UNREACHABLE(argumentCount); 1311 } 1312 break; 1313 case TextureFunction::SIZE: 1314 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]); 1315 break; 1316 default: 1317 UNREACHABLE(textureFunction.method); 1318 } 1319 } 1320 } 1321 break; 1322 case EOpParameters: 1323 break; 1324 case EOpConstructFloat: 1325 case EOpConstructVec2: 1326 case EOpConstructVec3: 1327 case EOpConstructVec4: 1328 case EOpConstructBool: 1329 case EOpConstructBVec2: 1330 case EOpConstructBVec3: 1331 case EOpConstructBVec4: 1332 case EOpConstructInt: 1333 case EOpConstructIVec2: 1334 case EOpConstructIVec3: 1335 case EOpConstructIVec4: 1336 case EOpConstructUInt: 1337 case EOpConstructUVec2: 1338 case EOpConstructUVec3: 1339 case EOpConstructUVec4: 1340 if(visit == PostVisit) 1341 { 1342 int component = 0; 1343 int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0; 1344 int arrayComponents = result->getType().getElementSize(); 1345 for(size_t i = 0; i < argumentCount; i++) 1346 { 1347 TIntermTyped *argi = arg[i]->getAsTyped(); 1348 int size = argi->getNominalSize(); 1349 int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex); 1350 int swizzle = component - (arrayIndex * arrayComponents); 1351 1352 if(!argi->isMatrix()) 1353 { 1354 Instruction *mov = emitCast(result, arrayIndex, argi, 0); 1355 mov->dst.mask = (0xF << swizzle) & 0xF; 1356 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1357 1358 component += size; 1359 } 1360 else // Matrix 1361 { 1362 int column = 0; 1363 1364 while(component < resultType.getNominalSize()) 1365 { 1366 Instruction *mov = emitCast(result, arrayIndex, argi, column); 1367 mov->dst.mask = (0xF << swizzle) & 0xF; 1368 mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2); 1369 1370 column++; 1371 component += size; 1372 } 1373 } 1374 } 1375 } 1376 break; 1377 case EOpConstructMat2: 1378 case EOpConstructMat2x3: 1379 case EOpConstructMat2x4: 1380 case EOpConstructMat3x2: 1381 case EOpConstructMat3: 1382 case EOpConstructMat3x4: 1383 case EOpConstructMat4x2: 1384 case EOpConstructMat4x3: 1385 case EOpConstructMat4: 1386 if(visit == PostVisit) 1387 { 1388 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1389 const int outCols = result->getNominalSize(); 1390 const int outRows = result->getSecondarySize(); 1391 1392 if(arg0->isScalar() && arg.size() == 1) // Construct scale matrix 1393 { 1394 for(int i = 0; i < outCols; i++) 1395 { 1396 emit(sw::Shader::OPCODE_MOV, result, i, &zero); 1397 Instruction *mov = emitCast(result, i, arg0, 0); 1398 mov->dst.mask = 1 << i; 1399 ASSERT(mov->src[0].swizzle == 0x00); 1400 } 1401 } 1402 else if(arg0->isMatrix()) 1403 { 1404 int arraySize = result->isArray() ? result->getArraySize() : 1; 1405 1406 for(int n = 0; n < arraySize; n++) 1407 { 1408 TIntermTyped *argi = arg[n]->getAsTyped(); 1409 const int inCols = argi->getNominalSize(); 1410 const int inRows = argi->getSecondarySize(); 1411 1412 for(int i = 0; i < outCols; i++) 1413 { 1414 if(i >= inCols || outRows > inRows) 1415 { 1416 // Initialize to identity matrix 1417 Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f)); 1418 emitCast(result, i + n * outCols, &col, 0); 1419 } 1420 1421 if(i < inCols) 1422 { 1423 Instruction *mov = emitCast(result, i + n * outCols, argi, i); 1424 mov->dst.mask = 0xF >> (4 - inRows); 1425 } 1426 } 1427 } 1428 } 1429 else 1430 { 1431 int column = 0; 1432 int row = 0; 1433 1434 for(size_t i = 0; i < argumentCount; i++) 1435 { 1436 TIntermTyped *argi = arg[i]->getAsTyped(); 1437 int size = argi->getNominalSize(); 1438 int element = 0; 1439 1440 while(element < size) 1441 { 1442 Instruction *mov = emitCast(result, column, argi, 0); 1443 mov->dst.mask = (0xF << row) & 0xF; 1444 mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element; 1445 1446 int end = row + size - element; 1447 column = end >= outRows ? column + 1 : column; 1448 element = element + outRows - row; 1449 row = end >= outRows ? 0 : end; 1450 } 1451 } 1452 } 1453 } 1454 break; 1455 case EOpConstructStruct: 1456 if(visit == PostVisit) 1457 { 1458 int offset = 0; 1459 for(size_t i = 0; i < argumentCount; i++) 1460 { 1461 TIntermTyped *argi = arg[i]->getAsTyped(); 1462 int size = argi->totalRegisterCount(); 1463 1464 for(int index = 0; index < size; index++) 1465 { 1466 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index); 1467 mov->dst.mask = writeMask(result, offset + index); 1468 } 1469 1470 offset += size; 1471 } 1472 } 1473 break; 1474 case EOpLessThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break; 1475 case EOpGreaterThan: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break; 1476 case EOpLessThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break; 1477 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break; 1478 case EOpVectorEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break; 1479 case EOpVectorNotEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break; 1480 case EOpMod: if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break; 1481 case EOpModf: 1482 if(visit == PostVisit) 1483 { 1484 TIntermTyped* arg1 = arg[1]->getAsTyped(); 1485 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]); 1486 assignLvalue(arg1, arg1); 1487 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1); 1488 } 1489 break; 1490 case EOpPow: if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break; 1491 case EOpAtan: if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break; 1492 case EOpMin: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break; 1493 case EOpMax: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break; 1494 case EOpClamp: 1495 if(visit == PostVisit) 1496 { 1497 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); 1498 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]); 1499 } 1500 break; 1501 case EOpMix: if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break; 1502 case EOpStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break; 1503 case EOpSmoothStep: if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break; 1504 case EOpDistance: if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break; 1505 case EOpDot: if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break; 1506 case EOpCross: if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break; 1507 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1508 case EOpReflect: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break; 1509 case EOpRefract: if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break; 1510 case EOpMul: 1511 if(visit == PostVisit) 1512 { 1513 TIntermTyped *arg0 = arg[0]->getAsTyped(); 1514 ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) && 1515 (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize())); 1516 1517 int size = arg0->getNominalSize(); 1518 for(int i = 0; i < size; i++) 1519 { 1520 emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i); 1521 } 1522 } 1523 break; 1524 case EOpOuterProduct: 1525 if(visit == PostVisit) 1526 { 1527 for(int i = 0; i < dim(arg[1]); i++) 1528 { 1529 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]); 1530 mul->src[1].swizzle = 0x55 * i; 1531 } 1532 } 1533 break; 1534 default: UNREACHABLE(node->getOp()); 1535 } 1536 1537 return true; 1538 } 1539 visitSelection(Visit visit,TIntermSelection * node)1540 bool OutputASM::visitSelection(Visit visit, TIntermSelection *node) 1541 { 1542 if(currentScope != emitScope) 1543 { 1544 return false; 1545 } 1546 1547 TIntermTyped *condition = node->getCondition(); 1548 TIntermNode *trueBlock = node->getTrueBlock(); 1549 TIntermNode *falseBlock = node->getFalseBlock(); 1550 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 1551 1552 condition->traverse(this); 1553 1554 if(node->usesTernaryOperator()) 1555 { 1556 if(constantCondition) 1557 { 1558 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1559 1560 if(trueCondition) 1561 { 1562 trueBlock->traverse(this); 1563 copy(node, trueBlock); 1564 } 1565 else 1566 { 1567 falseBlock->traverse(this); 1568 copy(node, falseBlock); 1569 } 1570 } 1571 else if(trivial(node, 6)) // Fast to compute both potential results and no side effects 1572 { 1573 trueBlock->traverse(this); 1574 falseBlock->traverse(this); 1575 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock); 1576 } 1577 else 1578 { 1579 emit(sw::Shader::OPCODE_IF, 0, condition); 1580 1581 if(trueBlock) 1582 { 1583 trueBlock->traverse(this); 1584 copy(node, trueBlock); 1585 } 1586 1587 if(falseBlock) 1588 { 1589 emit(sw::Shader::OPCODE_ELSE); 1590 falseBlock->traverse(this); 1591 copy(node, falseBlock); 1592 } 1593 1594 emit(sw::Shader::OPCODE_ENDIF); 1595 } 1596 } 1597 else // if/else statement 1598 { 1599 if(constantCondition) 1600 { 1601 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 1602 1603 if(trueCondition) 1604 { 1605 if(trueBlock) 1606 { 1607 trueBlock->traverse(this); 1608 } 1609 } 1610 else 1611 { 1612 if(falseBlock) 1613 { 1614 falseBlock->traverse(this); 1615 } 1616 } 1617 } 1618 else 1619 { 1620 emit(sw::Shader::OPCODE_IF, 0, condition); 1621 1622 if(trueBlock) 1623 { 1624 trueBlock->traverse(this); 1625 } 1626 1627 if(falseBlock) 1628 { 1629 emit(sw::Shader::OPCODE_ELSE); 1630 falseBlock->traverse(this); 1631 } 1632 1633 emit(sw::Shader::OPCODE_ENDIF); 1634 } 1635 } 1636 1637 return false; 1638 } 1639 visitLoop(Visit visit,TIntermLoop * node)1640 bool OutputASM::visitLoop(Visit visit, TIntermLoop *node) 1641 { 1642 if(currentScope != emitScope) 1643 { 1644 return false; 1645 } 1646 1647 unsigned int iterations = loopCount(node); 1648 1649 if(iterations == 0) 1650 { 1651 return false; 1652 } 1653 1654 bool unroll = (iterations <= 4); 1655 1656 if(unroll) 1657 { 1658 LoopUnrollable loopUnrollable; 1659 unroll = loopUnrollable.traverse(node); 1660 } 1661 1662 TIntermNode *init = node->getInit(); 1663 TIntermTyped *condition = node->getCondition(); 1664 TIntermTyped *expression = node->getExpression(); 1665 TIntermNode *body = node->getBody(); 1666 Constant True(true); 1667 1668 if(node->getType() == ELoopDoWhile) 1669 { 1670 Temporary iterate(this); 1671 emit(sw::Shader::OPCODE_MOV, &iterate, &True); 1672 1673 emit(sw::Shader::OPCODE_WHILE, 0, &iterate); // FIXME: Implement real do-while 1674 1675 if(body) 1676 { 1677 body->traverse(this); 1678 } 1679 1680 emit(sw::Shader::OPCODE_TEST); 1681 1682 condition->traverse(this); 1683 emit(sw::Shader::OPCODE_MOV, &iterate, condition); 1684 1685 emit(sw::Shader::OPCODE_ENDWHILE); 1686 } 1687 else 1688 { 1689 if(init) 1690 { 1691 init->traverse(this); 1692 } 1693 1694 if(unroll) 1695 { 1696 for(unsigned int i = 0; i < iterations; i++) 1697 { 1698 // condition->traverse(this); // Condition could contain statements, but not in an unrollable loop 1699 1700 if(body) 1701 { 1702 body->traverse(this); 1703 } 1704 1705 if(expression) 1706 { 1707 expression->traverse(this); 1708 } 1709 } 1710 } 1711 else 1712 { 1713 if(condition) 1714 { 1715 condition->traverse(this); 1716 } 1717 else 1718 { 1719 condition = &True; 1720 } 1721 1722 emit(sw::Shader::OPCODE_WHILE, 0, condition); 1723 1724 if(body) 1725 { 1726 body->traverse(this); 1727 } 1728 1729 emit(sw::Shader::OPCODE_TEST); 1730 1731 if(expression) 1732 { 1733 expression->traverse(this); 1734 } 1735 1736 if(condition) 1737 { 1738 condition->traverse(this); 1739 } 1740 1741 emit(sw::Shader::OPCODE_ENDWHILE); 1742 } 1743 } 1744 1745 return false; 1746 } 1747 visitBranch(Visit visit,TIntermBranch * node)1748 bool OutputASM::visitBranch(Visit visit, TIntermBranch *node) 1749 { 1750 if(currentScope != emitScope) 1751 { 1752 return false; 1753 } 1754 1755 switch(node->getFlowOp()) 1756 { 1757 case EOpKill: if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD); break; 1758 case EOpBreak: if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK); break; 1759 case EOpContinue: if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break; 1760 case EOpReturn: 1761 if(visit == PostVisit) 1762 { 1763 TIntermTyped *value = node->getExpression(); 1764 1765 if(value) 1766 { 1767 copy(functionArray[currentFunction].ret, value); 1768 } 1769 1770 emit(sw::Shader::OPCODE_LEAVE); 1771 } 1772 break; 1773 default: UNREACHABLE(node->getFlowOp()); 1774 } 1775 1776 return true; 1777 } 1778 visitSwitch(Visit visit,TIntermSwitch * node)1779 bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node) 1780 { 1781 if(currentScope != emitScope) 1782 { 1783 return false; 1784 } 1785 1786 TIntermTyped* switchValue = node->getInit(); 1787 TIntermAggregate* opList = node->getStatementList(); 1788 1789 if(!switchValue || !opList) 1790 { 1791 return false; 1792 } 1793 1794 switchValue->traverse(this); 1795 1796 emit(sw::Shader::OPCODE_SWITCH); 1797 1798 TIntermSequence& sequence = opList->getSequence(); 1799 TIntermSequence::iterator it = sequence.begin(); 1800 TIntermSequence::iterator defaultIt = sequence.end(); 1801 int nbCases = 0; 1802 for(; it != sequence.end(); ++it) 1803 { 1804 TIntermCase* currentCase = (*it)->getAsCaseNode(); 1805 if(currentCase) 1806 { 1807 TIntermSequence::iterator caseIt = it; 1808 1809 TIntermTyped* condition = currentCase->getCondition(); 1810 if(condition) // non default case 1811 { 1812 if(nbCases != 0) 1813 { 1814 emit(sw::Shader::OPCODE_ELSE); 1815 } 1816 1817 condition->traverse(this); 1818 Temporary result(this); 1819 emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition); 1820 emit(sw::Shader::OPCODE_IF, 0, &result); 1821 nbCases++; 1822 1823 for(++caseIt; caseIt != sequence.end(); ++caseIt) 1824 { 1825 (*caseIt)->traverse(this); 1826 if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return 1827 { 1828 break; 1829 } 1830 } 1831 } 1832 else 1833 { 1834 defaultIt = it; // The default case might not be the last case, keep it for last 1835 } 1836 } 1837 } 1838 1839 // If there's a default case, traverse it here 1840 if(defaultIt != sequence.end()) 1841 { 1842 emit(sw::Shader::OPCODE_ELSE); 1843 for(++defaultIt; defaultIt != sequence.end(); ++defaultIt) 1844 { 1845 (*defaultIt)->traverse(this); 1846 if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return 1847 { 1848 break; 1849 } 1850 } 1851 } 1852 1853 for(int i = 0; i < nbCases; ++i) 1854 { 1855 emit(sw::Shader::OPCODE_ENDIF); 1856 } 1857 1858 emit(sw::Shader::OPCODE_ENDSWITCH); 1859 1860 return false; 1861 } 1862 emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)1863 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4) 1864 { 1865 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0); 1866 } 1867 emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)1868 Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1, 1869 TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4) 1870 { 1871 Instruction *instruction = new Instruction(op); 1872 1873 if(dst) 1874 { 1875 instruction->dst.type = registerType(dst); 1876 instruction->dst.index = registerIndex(dst) + dstIndex; 1877 instruction->dst.mask = writeMask(dst); 1878 instruction->dst.integer = (dst->getBasicType() == EbtInt); 1879 } 1880 1881 argument(instruction->src[0], src0, index0); 1882 argument(instruction->src[1], src1, index1); 1883 argument(instruction->src[2], src2, index2); 1884 argument(instruction->src[3], src3, index3); 1885 argument(instruction->src[4], src4, index4); 1886 1887 shader->append(instruction); 1888 1889 return instruction; 1890 } 1891 emitCast(TIntermTyped * dst,TIntermTyped * src)1892 Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src) 1893 { 1894 return emitCast(dst, 0, src, 0); 1895 } 1896 emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)1897 Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex) 1898 { 1899 switch(src->getBasicType()) 1900 { 1901 case EbtBool: 1902 switch(dst->getBasicType()) 1903 { 1904 case EbtInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 1905 case EbtUInt: return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex); 1906 case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex); 1907 default: break; 1908 } 1909 break; 1910 case EbtInt: 1911 switch(dst->getBasicType()) 1912 { 1913 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 1914 case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex); 1915 default: break; 1916 } 1917 break; 1918 case EbtUInt: 1919 switch(dst->getBasicType()) 1920 { 1921 case EbtBool: return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex); 1922 case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex); 1923 default: break; 1924 } 1925 break; 1926 case EbtFloat: 1927 switch(dst->getBasicType()) 1928 { 1929 case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex); 1930 case EbtInt: return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex); 1931 case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex); 1932 default: break; 1933 } 1934 break; 1935 default: 1936 break; 1937 } 1938 1939 ASSERT((src->getBasicType() == dst->getBasicType()) || 1940 ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) || 1941 ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt))); 1942 1943 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex); 1944 } 1945 emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)1946 void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2) 1947 { 1948 for(int index = 0; index < dst->elementRegisterCount(); index++) 1949 { 1950 emit(op, dst, index, src0, index, src1, index, src2, index); 1951 } 1952 } 1953 emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)1954 void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1) 1955 { 1956 emitBinary(op, result, src0, src1); 1957 assignLvalue(lhs, result); 1958 } 1959 emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)1960 void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index) 1961 { 1962 sw::Shader::Opcode opcode; 1963 switch(left->getAsTyped()->getBasicType()) 1964 { 1965 case EbtBool: 1966 case EbtInt: 1967 opcode = sw::Shader::OPCODE_ICMP; 1968 break; 1969 case EbtUInt: 1970 opcode = sw::Shader::OPCODE_UCMP; 1971 break; 1972 default: 1973 opcode = sw::Shader::OPCODE_CMP; 1974 break; 1975 } 1976 1977 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index); 1978 cmp->control = cmpOp; 1979 } 1980 componentCount(const TType & type,int registers)1981 int componentCount(const TType &type, int registers) 1982 { 1983 if(registers == 0) 1984 { 1985 return 0; 1986 } 1987 1988 if(type.isArray() && registers >= type.elementRegisterCount()) 1989 { 1990 int index = registers / type.elementRegisterCount(); 1991 registers -= index * type.elementRegisterCount(); 1992 return index * type.getElementSize() + componentCount(type, registers); 1993 } 1994 1995 if(type.isStruct() || type.isInterfaceBlock()) 1996 { 1997 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 1998 int elements = 0; 1999 2000 for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++) 2001 { 2002 const TType &fieldType = *((*field)->type()); 2003 2004 if(fieldType.totalRegisterCount() <= registers) 2005 { 2006 registers -= fieldType.totalRegisterCount(); 2007 elements += fieldType.getObjectSize(); 2008 } 2009 else // Register within this field 2010 { 2011 return elements + componentCount(fieldType, registers); 2012 } 2013 } 2014 } 2015 else if(type.isMatrix()) 2016 { 2017 return registers * type.registerSize(); 2018 } 2019 2020 UNREACHABLE(0); 2021 return 0; 2022 } 2023 registerSize(const TType & type,int registers)2024 int registerSize(const TType &type, int registers) 2025 { 2026 if(registers == 0) 2027 { 2028 if(type.isStruct()) 2029 { 2030 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0); 2031 } 2032 else if(type.isInterfaceBlock()) 2033 { 2034 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0); 2035 } 2036 2037 return type.registerSize(); 2038 } 2039 2040 if(type.isArray() && registers >= type.elementRegisterCount()) 2041 { 2042 int index = registers / type.elementRegisterCount(); 2043 registers -= index * type.elementRegisterCount(); 2044 return registerSize(type, registers); 2045 } 2046 2047 if(type.isStruct() || type.isInterfaceBlock()) 2048 { 2049 const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields(); 2050 int elements = 0; 2051 2052 for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++) 2053 { 2054 const TType &fieldType = *((*field)->type()); 2055 2056 if(fieldType.totalRegisterCount() <= registers) 2057 { 2058 registers -= fieldType.totalRegisterCount(); 2059 elements += fieldType.getObjectSize(); 2060 } 2061 else // Register within this field 2062 { 2063 return registerSize(fieldType, registers); 2064 } 2065 } 2066 } 2067 else if(type.isMatrix()) 2068 { 2069 return registerSize(type, 0); 2070 } 2071 2072 UNREACHABLE(0); 2073 return 0; 2074 } 2075 getBlockId(TIntermTyped * arg)2076 int OutputASM::getBlockId(TIntermTyped *arg) 2077 { 2078 if(arg) 2079 { 2080 const TType &type = arg->getType(); 2081 TInterfaceBlock* block = type.getInterfaceBlock(); 2082 if(block && (type.getQualifier() == EvqUniform)) 2083 { 2084 // Make sure the uniform block is declared 2085 uniformRegister(arg); 2086 2087 const char* blockName = block->name().c_str(); 2088 2089 // Fetch uniform block index from array of blocks 2090 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it) 2091 { 2092 if(blockName == it->name) 2093 { 2094 return it->blockId; 2095 } 2096 } 2097 2098 ASSERT(false); 2099 } 2100 } 2101 2102 return -1; 2103 } 2104 getArgumentInfo(TIntermTyped * arg,int index)2105 OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index) 2106 { 2107 const TType &type = arg->getType(); 2108 int blockId = getBlockId(arg); 2109 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1); 2110 if(blockId != -1) 2111 { 2112 argumentInfo.bufferIndex = 0; 2113 for(int i = 0; i < blockId; ++i) 2114 { 2115 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize; 2116 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1; 2117 } 2118 2119 const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId]; 2120 2121 BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end(); 2122 BlockDefinitionIndexMap::const_iterator it = itEnd; 2123 2124 argumentInfo.clampedIndex = index; 2125 if(type.isInterfaceBlock()) 2126 { 2127 // Offset index to the beginning of the selected instance 2128 int blockRegisters = type.elementRegisterCount(); 2129 int bufferOffset = argumentInfo.clampedIndex / blockRegisters; 2130 argumentInfo.bufferIndex += bufferOffset; 2131 argumentInfo.clampedIndex -= bufferOffset * blockRegisters; 2132 } 2133 2134 int regIndex = registerIndex(arg); 2135 for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i) 2136 { 2137 it = blockDefinition.find(i); 2138 if(it != itEnd) 2139 { 2140 argumentInfo.clampedIndex -= (i - regIndex); 2141 break; 2142 } 2143 } 2144 ASSERT(it != itEnd); 2145 2146 argumentInfo.typedMemberInfo = it->second; 2147 2148 int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount(); 2149 argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex; 2150 } 2151 else 2152 { 2153 argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index; 2154 } 2155 2156 return argumentInfo; 2157 } 2158 argument(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2159 void OutputASM::argument(sw::Shader::SourceParameter ¶meter, TIntermNode *argument, int index) 2160 { 2161 if(argument) 2162 { 2163 TIntermTyped *arg = argument->getAsTyped(); 2164 Temporary unpackedUniform(this); 2165 2166 const TType& srcType = arg->getType(); 2167 TInterfaceBlock* srcBlock = srcType.getInterfaceBlock(); 2168 if(srcBlock && (srcType.getQualifier() == EvqUniform)) 2169 { 2170 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2171 const TType &memberType = argumentInfo.typedMemberInfo.type; 2172 2173 if(memberType.getBasicType() == EbtBool) 2174 { 2175 ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize 2176 2177 // Convert the packed bool, which is currently an int, to a true bool 2178 Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B); 2179 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2180 instruction->dst.index = registerIndex(&unpackedUniform); 2181 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2182 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2183 instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride; 2184 2185 shader->append(instruction); 2186 2187 arg = &unpackedUniform; 2188 index = 0; 2189 } 2190 else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix()) 2191 { 2192 int numCols = memberType.getNominalSize(); 2193 int numRows = memberType.getSecondarySize(); 2194 2195 ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize 2196 2197 unsigned int dstIndex = registerIndex(&unpackedUniform); 2198 unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55; 2199 int arrayIndex = argumentInfo.clampedIndex / numCols; 2200 int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride; 2201 2202 for(int j = 0; j < numRows; ++j) 2203 { 2204 // Transpose the row major matrix 2205 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV); 2206 instruction->dst.type = sw::Shader::PARAMETER_TEMP; 2207 instruction->dst.index = dstIndex; 2208 instruction->dst.mask = 1 << j; 2209 instruction->src[0].type = sw::Shader::PARAMETER_CONST; 2210 instruction->src[0].bufferIndex = argumentInfo.bufferIndex; 2211 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride; 2212 instruction->src[0].swizzle = srcSwizzle; 2213 2214 shader->append(instruction); 2215 } 2216 2217 arg = &unpackedUniform; 2218 index = 0; 2219 } 2220 } 2221 2222 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index); 2223 const TType &type = argumentInfo.typedMemberInfo.type; 2224 2225 int size = registerSize(type, argumentInfo.clampedIndex); 2226 2227 parameter.type = registerType(arg); 2228 parameter.bufferIndex = argumentInfo.bufferIndex; 2229 2230 if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer()) 2231 { 2232 int component = componentCount(type, argumentInfo.clampedIndex); 2233 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer(); 2234 2235 for(int i = 0; i < 4; i++) 2236 { 2237 if(size == 1) // Replicate 2238 { 2239 parameter.value[i] = constants[component + 0].getAsFloat(); 2240 } 2241 else if(i < size) 2242 { 2243 parameter.value[i] = constants[component + i].getAsFloat(); 2244 } 2245 else 2246 { 2247 parameter.value[i] = 0.0f; 2248 } 2249 } 2250 } 2251 else 2252 { 2253 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex; 2254 2255 if(parameter.bufferIndex != -1) 2256 { 2257 int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride; 2258 parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride; 2259 } 2260 } 2261 2262 if(!IsSampler(arg->getBasicType())) 2263 { 2264 parameter.swizzle = readSwizzle(arg, size); 2265 } 2266 } 2267 } 2268 copy(TIntermTyped * dst,TIntermNode * src,int offset)2269 void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset) 2270 { 2271 for(int index = 0; index < dst->totalRegisterCount(); index++) 2272 { 2273 Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index); 2274 mov->dst.mask = writeMask(dst, index); 2275 } 2276 } 2277 swizzleElement(int swizzle,int index)2278 int swizzleElement(int swizzle, int index) 2279 { 2280 return (swizzle >> (index * 2)) & 0x03; 2281 } 2282 swizzleSwizzle(int leftSwizzle,int rightSwizzle)2283 int swizzleSwizzle(int leftSwizzle, int rightSwizzle) 2284 { 2285 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) | 2286 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) | 2287 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) | 2288 (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6); 2289 } 2290 assignLvalue(TIntermTyped * dst,TIntermTyped * src)2291 void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src) 2292 { 2293 if(src && 2294 ((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) || 2295 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))) 2296 { 2297 return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix"); 2298 } 2299 2300 TIntermBinary *binary = dst->getAsBinaryNode(); 2301 2302 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar()) 2303 { 2304 Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT); 2305 2306 Temporary address(this); 2307 lvalue(insert->dst, address, dst); 2308 2309 insert->src[0].type = insert->dst.type; 2310 insert->src[0].index = insert->dst.index; 2311 insert->src[0].rel = insert->dst.rel; 2312 argument(insert->src[1], src); 2313 argument(insert->src[2], binary->getRight()); 2314 2315 shader->append(insert); 2316 } 2317 else 2318 { 2319 for(int offset = 0; offset < dst->totalRegisterCount(); offset++) 2320 { 2321 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV); 2322 2323 Temporary address(this); 2324 int swizzle = lvalue(mov->dst, address, dst); 2325 mov->dst.index += offset; 2326 2327 if(offset > 0) 2328 { 2329 mov->dst.mask = writeMask(dst, offset); 2330 } 2331 2332 argument(mov->src[0], src, offset); 2333 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle); 2334 2335 shader->append(mov); 2336 } 2337 } 2338 } 2339 lvalue(sw::Shader::DestinationParameter & dst,Temporary & address,TIntermTyped * node)2340 int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node) 2341 { 2342 TIntermTyped *result = node; 2343 TIntermBinary *binary = node->getAsBinaryNode(); 2344 TIntermSymbol *symbol = node->getAsSymbolNode(); 2345 2346 if(binary) 2347 { 2348 TIntermTyped *left = binary->getLeft(); 2349 TIntermTyped *right = binary->getRight(); 2350 2351 int leftSwizzle = lvalue(dst, address, left); // Resolve the l-value of the left side 2352 2353 switch(binary->getOp()) 2354 { 2355 case EOpIndexDirect: 2356 { 2357 int rightIndex = right->getAsConstantUnion()->getIConst(0); 2358 2359 if(left->isRegister()) 2360 { 2361 int leftMask = dst.mask; 2362 2363 dst.mask = 1; 2364 while((leftMask & dst.mask) == 0) 2365 { 2366 dst.mask = dst.mask << 1; 2367 } 2368 2369 int element = swizzleElement(leftSwizzle, rightIndex); 2370 dst.mask = 1 << element; 2371 2372 return element; 2373 } 2374 else if(left->isArray() || left->isMatrix()) 2375 { 2376 dst.index += rightIndex * result->totalRegisterCount(); 2377 return 0xE4; 2378 } 2379 else UNREACHABLE(0); 2380 } 2381 break; 2382 case EOpIndexIndirect: 2383 { 2384 if(left->isRegister()) 2385 { 2386 // Requires INSERT instruction (handled by calling function) 2387 } 2388 else if(left->isArray() || left->isMatrix()) 2389 { 2390 int scale = result->totalRegisterCount(); 2391 2392 if(dst.rel.type == sw::Shader::PARAMETER_VOID) // Use the index register as the relative address directly 2393 { 2394 if(left->totalRegisterCount() > 1) 2395 { 2396 sw::Shader::SourceParameter relativeRegister; 2397 argument(relativeRegister, right); 2398 2399 dst.rel.index = relativeRegister.index; 2400 dst.rel.type = relativeRegister.type; 2401 dst.rel.scale = scale; 2402 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform); 2403 } 2404 } 2405 else if(dst.rel.index != registerIndex(&address)) // Move the previous index register to the address register 2406 { 2407 if(scale == 1) 2408 { 2409 Constant oldScale((int)dst.rel.scale); 2410 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right); 2411 mad->src[0].index = dst.rel.index; 2412 mad->src[0].type = dst.rel.type; 2413 } 2414 else 2415 { 2416 Constant oldScale((int)dst.rel.scale); 2417 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale); 2418 mul->src[0].index = dst.rel.index; 2419 mul->src[0].type = dst.rel.type; 2420 2421 Constant newScale(scale); 2422 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2423 } 2424 2425 dst.rel.type = sw::Shader::PARAMETER_TEMP; 2426 dst.rel.index = registerIndex(&address); 2427 dst.rel.scale = 1; 2428 } 2429 else // Just add the new index to the address register 2430 { 2431 if(scale == 1) 2432 { 2433 emit(sw::Shader::OPCODE_IADD, &address, &address, right); 2434 } 2435 else 2436 { 2437 Constant newScale(scale); 2438 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address); 2439 } 2440 } 2441 } 2442 else UNREACHABLE(0); 2443 } 2444 break; 2445 case EOpIndexDirectStruct: 2446 case EOpIndexDirectInterfaceBlock: 2447 { 2448 const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ? 2449 left->getType().getStruct()->fields() : 2450 left->getType().getInterfaceBlock()->fields(); 2451 int index = right->getAsConstantUnion()->getIConst(0); 2452 int fieldOffset = 0; 2453 2454 for(int i = 0; i < index; i++) 2455 { 2456 fieldOffset += fields[i]->type()->totalRegisterCount(); 2457 } 2458 2459 dst.type = registerType(left); 2460 dst.index += fieldOffset; 2461 dst.mask = writeMask(result); 2462 2463 return 0xE4; 2464 } 2465 break; 2466 case EOpVectorSwizzle: 2467 { 2468 ASSERT(left->isRegister()); 2469 2470 int leftMask = dst.mask; 2471 2472 int swizzle = 0; 2473 int rightMask = 0; 2474 2475 TIntermSequence &sequence = right->getAsAggregate()->getSequence(); 2476 2477 for(unsigned int i = 0; i < sequence.size(); i++) 2478 { 2479 int index = sequence[i]->getAsConstantUnion()->getIConst(0); 2480 2481 int element = swizzleElement(leftSwizzle, index); 2482 rightMask = rightMask | (1 << element); 2483 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2); 2484 } 2485 2486 dst.mask = leftMask & rightMask; 2487 2488 return swizzle; 2489 } 2490 break; 2491 default: 2492 UNREACHABLE(binary->getOp()); // Not an l-value operator 2493 break; 2494 } 2495 } 2496 else if(symbol) 2497 { 2498 dst.type = registerType(symbol); 2499 dst.index = registerIndex(symbol); 2500 dst.mask = writeMask(symbol); 2501 return 0xE4; 2502 } 2503 2504 return 0xE4; 2505 } 2506 registerType(TIntermTyped * operand)2507 sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand) 2508 { 2509 if(isSamplerRegister(operand)) 2510 { 2511 return sw::Shader::PARAMETER_SAMPLER; 2512 } 2513 2514 const TQualifier qualifier = operand->getQualifier(); 2515 if((EvqFragColor == qualifier) || (EvqFragData == qualifier)) 2516 { 2517 if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) || 2518 ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier))) 2519 { 2520 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", ""); 2521 } 2522 outputQualifier = qualifier; 2523 } 2524 2525 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer())) 2526 { 2527 return sw::Shader::PARAMETER_TEMP; 2528 } 2529 2530 switch(qualifier) 2531 { 2532 case EvqTemporary: return sw::Shader::PARAMETER_TEMP; 2533 case EvqGlobal: return sw::Shader::PARAMETER_TEMP; 2534 case EvqConstExpr: return sw::Shader::PARAMETER_FLOAT4LITERAL; // All converted to float 2535 case EvqAttribute: return sw::Shader::PARAMETER_INPUT; 2536 case EvqVaryingIn: return sw::Shader::PARAMETER_INPUT; 2537 case EvqVaryingOut: return sw::Shader::PARAMETER_OUTPUT; 2538 case EvqVertexIn: return sw::Shader::PARAMETER_INPUT; 2539 case EvqFragmentOut: return sw::Shader::PARAMETER_COLOROUT; 2540 case EvqVertexOut: return sw::Shader::PARAMETER_OUTPUT; 2541 case EvqFragmentIn: return sw::Shader::PARAMETER_INPUT; 2542 case EvqInvariantVaryingIn: return sw::Shader::PARAMETER_INPUT; // FIXME: Guarantee invariance at the backend 2543 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT; // FIXME: Guarantee invariance at the backend 2544 case EvqSmooth: return sw::Shader::PARAMETER_OUTPUT; 2545 case EvqFlat: return sw::Shader::PARAMETER_OUTPUT; 2546 case EvqCentroidOut: return sw::Shader::PARAMETER_OUTPUT; 2547 case EvqSmoothIn: return sw::Shader::PARAMETER_INPUT; 2548 case EvqFlatIn: return sw::Shader::PARAMETER_INPUT; 2549 case EvqCentroidIn: return sw::Shader::PARAMETER_INPUT; 2550 case EvqUniform: return sw::Shader::PARAMETER_CONST; 2551 case EvqIn: return sw::Shader::PARAMETER_TEMP; 2552 case EvqOut: return sw::Shader::PARAMETER_TEMP; 2553 case EvqInOut: return sw::Shader::PARAMETER_TEMP; 2554 case EvqConstReadOnly: return sw::Shader::PARAMETER_TEMP; 2555 case EvqPosition: return sw::Shader::PARAMETER_OUTPUT; 2556 case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT; 2557 case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE; 2558 case EvqVertexID: return sw::Shader::PARAMETER_MISCTYPE; 2559 case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE; 2560 case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE; 2561 case EvqPointCoord: return sw::Shader::PARAMETER_INPUT; 2562 case EvqFragColor: return sw::Shader::PARAMETER_COLOROUT; 2563 case EvqFragData: return sw::Shader::PARAMETER_COLOROUT; 2564 case EvqFragDepth: return sw::Shader::PARAMETER_DEPTHOUT; 2565 default: UNREACHABLE(qualifier); 2566 } 2567 2568 return sw::Shader::PARAMETER_VOID; 2569 } 2570 hasFlatQualifier(TIntermTyped * operand)2571 bool OutputASM::hasFlatQualifier(TIntermTyped *operand) 2572 { 2573 const TQualifier qualifier = operand->getQualifier(); 2574 return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn; 2575 } 2576 registerIndex(TIntermTyped * operand)2577 unsigned int OutputASM::registerIndex(TIntermTyped *operand) 2578 { 2579 if(isSamplerRegister(operand)) 2580 { 2581 return samplerRegister(operand); 2582 } 2583 2584 switch(operand->getQualifier()) 2585 { 2586 case EvqTemporary: return temporaryRegister(operand); 2587 case EvqGlobal: return temporaryRegister(operand); 2588 case EvqConstExpr: return temporaryRegister(operand); // Unevaluated constant expression 2589 case EvqAttribute: return attributeRegister(operand); 2590 case EvqVaryingIn: return varyingRegister(operand); 2591 case EvqVaryingOut: return varyingRegister(operand); 2592 case EvqVertexIn: return attributeRegister(operand); 2593 case EvqFragmentOut: return fragmentOutputRegister(operand); 2594 case EvqVertexOut: return varyingRegister(operand); 2595 case EvqFragmentIn: return varyingRegister(operand); 2596 case EvqInvariantVaryingIn: return varyingRegister(operand); 2597 case EvqInvariantVaryingOut: return varyingRegister(operand); 2598 case EvqSmooth: return varyingRegister(operand); 2599 case EvqFlat: return varyingRegister(operand); 2600 case EvqCentroidOut: return varyingRegister(operand); 2601 case EvqSmoothIn: return varyingRegister(operand); 2602 case EvqFlatIn: return varyingRegister(operand); 2603 case EvqCentroidIn: return varyingRegister(operand); 2604 case EvqUniform: return uniformRegister(operand); 2605 case EvqIn: return temporaryRegister(operand); 2606 case EvqOut: return temporaryRegister(operand); 2607 case EvqInOut: return temporaryRegister(operand); 2608 case EvqConstReadOnly: return temporaryRegister(operand); 2609 case EvqPosition: return varyingRegister(operand); 2610 case EvqPointSize: return varyingRegister(operand); 2611 case EvqInstanceID: vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex; 2612 case EvqVertexID: vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex; 2613 case EvqFragCoord: pixelShader->declareVPos(); return sw::Shader::VPosIndex; 2614 case EvqFrontFacing: pixelShader->declareVFace(); return sw::Shader::VFaceIndex; 2615 case EvqPointCoord: return varyingRegister(operand); 2616 case EvqFragColor: return 0; 2617 case EvqFragData: return fragmentOutputRegister(operand); 2618 case EvqFragDepth: return 0; 2619 default: UNREACHABLE(operand->getQualifier()); 2620 } 2621 2622 return 0; 2623 } 2624 writeMask(TIntermTyped * destination,int index)2625 int OutputASM::writeMask(TIntermTyped *destination, int index) 2626 { 2627 if(destination->getQualifier() == EvqPointSize) 2628 { 2629 return 0x2; // Point size stored in the y component 2630 } 2631 2632 return 0xF >> (4 - registerSize(destination->getType(), index)); 2633 } 2634 readSwizzle(TIntermTyped * argument,int size)2635 int OutputASM::readSwizzle(TIntermTyped *argument, int size) 2636 { 2637 if(argument->getQualifier() == EvqPointSize) 2638 { 2639 return 0x55; // Point size stored in the y component 2640 } 2641 2642 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4}; // (void), xxxx, xyyy, xyzz, xyzw 2643 2644 return swizzleSize[size]; 2645 } 2646 2647 // Conservatively checks whether an expression is fast to compute and has no side effects trivial(TIntermTyped * expression,int budget)2648 bool OutputASM::trivial(TIntermTyped *expression, int budget) 2649 { 2650 if(!expression->isRegister()) 2651 { 2652 return false; 2653 } 2654 2655 return cost(expression, budget) >= 0; 2656 } 2657 2658 // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects) cost(TIntermNode * expression,int budget)2659 int OutputASM::cost(TIntermNode *expression, int budget) 2660 { 2661 if(budget < 0) 2662 { 2663 return budget; 2664 } 2665 2666 if(expression->getAsSymbolNode()) 2667 { 2668 return budget; 2669 } 2670 else if(expression->getAsConstantUnion()) 2671 { 2672 return budget; 2673 } 2674 else if(expression->getAsBinaryNode()) 2675 { 2676 TIntermBinary *binary = expression->getAsBinaryNode(); 2677 2678 switch(binary->getOp()) 2679 { 2680 case EOpVectorSwizzle: 2681 case EOpIndexDirect: 2682 case EOpIndexDirectStruct: 2683 case EOpIndexDirectInterfaceBlock: 2684 return cost(binary->getLeft(), budget - 0); 2685 case EOpAdd: 2686 case EOpSub: 2687 case EOpMul: 2688 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1)); 2689 default: 2690 return -1; 2691 } 2692 } 2693 else if(expression->getAsUnaryNode()) 2694 { 2695 TIntermUnary *unary = expression->getAsUnaryNode(); 2696 2697 switch(unary->getOp()) 2698 { 2699 case EOpAbs: 2700 case EOpNegative: 2701 return cost(unary->getOperand(), budget - 1); 2702 default: 2703 return -1; 2704 } 2705 } 2706 else if(expression->getAsSelectionNode()) 2707 { 2708 TIntermSelection *selection = expression->getAsSelectionNode(); 2709 2710 if(selection->usesTernaryOperator()) 2711 { 2712 TIntermTyped *condition = selection->getCondition(); 2713 TIntermNode *trueBlock = selection->getTrueBlock(); 2714 TIntermNode *falseBlock = selection->getFalseBlock(); 2715 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion(); 2716 2717 if(constantCondition) 2718 { 2719 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst(); 2720 2721 if(trueCondition) 2722 { 2723 return cost(trueBlock, budget - 0); 2724 } 2725 else 2726 { 2727 return cost(falseBlock, budget - 0); 2728 } 2729 } 2730 else 2731 { 2732 return cost(trueBlock, cost(falseBlock, budget - 2)); 2733 } 2734 } 2735 } 2736 2737 return -1; 2738 } 2739 findFunction(const TString & name)2740 const Function *OutputASM::findFunction(const TString &name) 2741 { 2742 for(unsigned int f = 0; f < functionArray.size(); f++) 2743 { 2744 if(functionArray[f].name == name) 2745 { 2746 return &functionArray[f]; 2747 } 2748 } 2749 2750 return 0; 2751 } 2752 temporaryRegister(TIntermTyped * temporary)2753 int OutputASM::temporaryRegister(TIntermTyped *temporary) 2754 { 2755 return allocate(temporaries, temporary); 2756 } 2757 varyingRegister(TIntermTyped * varying)2758 int OutputASM::varyingRegister(TIntermTyped *varying) 2759 { 2760 int var = lookup(varyings, varying); 2761 2762 if(var == -1) 2763 { 2764 var = allocate(varyings, varying); 2765 int componentCount = varying->registerSize(); 2766 int registerCount = varying->totalRegisterCount(); 2767 2768 if(pixelShader) 2769 { 2770 if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS) 2771 { 2772 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader"); 2773 return 0; 2774 } 2775 2776 if(varying->getQualifier() == EvqPointCoord) 2777 { 2778 ASSERT(varying->isRegister()); 2779 pixelShader->setInput(var, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var)); 2780 } 2781 else 2782 { 2783 for(int i = 0; i < varying->totalRegisterCount(); i++) 2784 { 2785 bool flat = hasFlatQualifier(varying); 2786 2787 pixelShader->setInput(var + i, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat)); 2788 } 2789 } 2790 } 2791 else if(vertexShader) 2792 { 2793 if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS) 2794 { 2795 mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader"); 2796 return 0; 2797 } 2798 2799 if(varying->getQualifier() == EvqPosition) 2800 { 2801 ASSERT(varying->isRegister()); 2802 vertexShader->setPositionRegister(var); 2803 } 2804 else if(varying->getQualifier() == EvqPointSize) 2805 { 2806 ASSERT(varying->isRegister()); 2807 vertexShader->setPointSizeRegister(var); 2808 } 2809 else 2810 { 2811 // Semantic indexes for user varyings will be assigned during program link to match the pixel shader 2812 } 2813 } 2814 else UNREACHABLE(0); 2815 2816 declareVarying(varying, var); 2817 } 2818 2819 return var; 2820 } 2821 declareVarying(TIntermTyped * varying,int reg)2822 void OutputASM::declareVarying(TIntermTyped *varying, int reg) 2823 { 2824 if(varying->getQualifier() != EvqPointCoord) // gl_PointCoord does not need linking 2825 { 2826 const TType &type = varying->getType(); 2827 const char *name = varying->getAsSymbolNode()->getSymbol().c_str(); 2828 VaryingList &activeVaryings = shaderObject->varyings; 2829 2830 // Check if this varying has been declared before without having a register assigned 2831 for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++) 2832 { 2833 if(v->name == name) 2834 { 2835 if(reg >= 0) 2836 { 2837 ASSERT(v->reg < 0 || v->reg == reg); 2838 v->reg = reg; 2839 } 2840 2841 return; 2842 } 2843 } 2844 2845 activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0)); 2846 } 2847 } 2848 uniformRegister(TIntermTyped * uniform)2849 int OutputASM::uniformRegister(TIntermTyped *uniform) 2850 { 2851 const TType &type = uniform->getType(); 2852 ASSERT(!IsSampler(type.getBasicType())); 2853 TInterfaceBlock *block = type.getAsInterfaceBlock(); 2854 TIntermSymbol *symbol = uniform->getAsSymbolNode(); 2855 ASSERT(symbol || block); 2856 2857 if(symbol || block) 2858 { 2859 TInterfaceBlock* parentBlock = type.getInterfaceBlock(); 2860 bool isBlockMember = (!block && parentBlock); 2861 int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform); 2862 2863 if(index == -1 || isBlockMember) 2864 { 2865 if(index == -1) 2866 { 2867 index = allocate(uniforms, uniform); 2868 } 2869 2870 // Verify if the current uniform is a member of an already declared block 2871 const TString &name = symbol ? symbol->getSymbol() : block->name(); 2872 int blockMemberIndex = blockMemberLookup(type, name, index); 2873 if(blockMemberIndex == -1) 2874 { 2875 declareUniform(type, name, index); 2876 } 2877 else 2878 { 2879 index = blockMemberIndex; 2880 } 2881 } 2882 2883 return index; 2884 } 2885 2886 return 0; 2887 } 2888 attributeRegister(TIntermTyped * attribute)2889 int OutputASM::attributeRegister(TIntermTyped *attribute) 2890 { 2891 ASSERT(!attribute->isArray()); 2892 2893 int index = lookup(attributes, attribute); 2894 2895 if(index == -1) 2896 { 2897 TIntermSymbol *symbol = attribute->getAsSymbolNode(); 2898 ASSERT(symbol); 2899 2900 if(symbol) 2901 { 2902 index = allocate(attributes, attribute); 2903 const TType &type = attribute->getType(); 2904 int registerCount = attribute->totalRegisterCount(); 2905 sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT; 2906 switch(type.getBasicType()) 2907 { 2908 case EbtInt: 2909 attribType = sw::VertexShader::ATTRIBTYPE_INT; 2910 break; 2911 case EbtUInt: 2912 attribType = sw::VertexShader::ATTRIBTYPE_UINT; 2913 break; 2914 case EbtFloat: 2915 default: 2916 break; 2917 } 2918 2919 if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS) 2920 { 2921 for(int i = 0; i < registerCount; i++) 2922 { 2923 vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType); 2924 } 2925 } 2926 2927 ActiveAttributes &activeAttributes = shaderObject->activeAttributes; 2928 2929 const char *name = symbol->getSymbol().c_str(); 2930 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index)); 2931 } 2932 } 2933 2934 return index; 2935 } 2936 fragmentOutputRegister(TIntermTyped * fragmentOutput)2937 int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput) 2938 { 2939 return allocate(fragmentOutputs, fragmentOutput); 2940 } 2941 samplerRegister(TIntermTyped * sampler)2942 int OutputASM::samplerRegister(TIntermTyped *sampler) 2943 { 2944 const TType &type = sampler->getType(); 2945 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 2946 2947 TIntermSymbol *symbol = sampler->getAsSymbolNode(); 2948 TIntermBinary *binary = sampler->getAsBinaryNode(); 2949 2950 if(symbol) 2951 { 2952 switch(type.getQualifier()) 2953 { 2954 case EvqUniform: 2955 return samplerRegister(symbol); 2956 case EvqIn: 2957 case EvqConstReadOnly: 2958 // Function arguments are not (uniform) sampler registers 2959 return -1; 2960 default: 2961 UNREACHABLE(type.getQualifier()); 2962 } 2963 } 2964 else if(binary) 2965 { 2966 TIntermTyped *left = binary->getLeft(); 2967 TIntermTyped *right = binary->getRight(); 2968 const TType &leftType = left->getType(); 2969 int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0; 2970 int offset = 0; 2971 2972 switch(binary->getOp()) 2973 { 2974 case EOpIndexDirect: 2975 ASSERT(left->isArray()); 2976 offset = index * leftType.elementRegisterCount(); 2977 break; 2978 case EOpIndexDirectStruct: 2979 ASSERT(leftType.isStruct()); 2980 { 2981 const TFieldList &fields = leftType.getStruct()->fields(); 2982 2983 for(int i = 0; i < index; i++) 2984 { 2985 offset += fields[i]->type()->totalRegisterCount(); 2986 } 2987 } 2988 break; 2989 case EOpIndexIndirect: // Indirect indexing produces a temporary, not a sampler register 2990 return -1; 2991 case EOpIndexDirectInterfaceBlock: // Interface blocks can't contain samplers 2992 default: 2993 UNREACHABLE(binary->getOp()); 2994 return -1; 2995 } 2996 2997 int base = samplerRegister(left); 2998 2999 if(base < 0) 3000 { 3001 return -1; 3002 } 3003 3004 return base + offset; 3005 } 3006 3007 UNREACHABLE(0); 3008 return -1; // Not a (uniform) sampler register 3009 } 3010 samplerRegister(TIntermSymbol * sampler)3011 int OutputASM::samplerRegister(TIntermSymbol *sampler) 3012 { 3013 const TType &type = sampler->getType(); 3014 ASSERT(IsSampler(type.getBasicType()) || type.isStruct()); // Structures can contain samplers 3015 3016 int index = lookup(samplers, sampler); 3017 3018 if(index == -1) 3019 { 3020 index = allocate(samplers, sampler); 3021 3022 if(sampler->getQualifier() == EvqUniform) 3023 { 3024 const char *name = sampler->getSymbol().c_str(); 3025 declareUniform(type, name, index); 3026 } 3027 } 3028 3029 return index; 3030 } 3031 isSamplerRegister(TIntermTyped * operand)3032 bool OutputASM::isSamplerRegister(TIntermTyped *operand) 3033 { 3034 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0; 3035 } 3036 lookup(VariableArray & list,TIntermTyped * variable)3037 int OutputASM::lookup(VariableArray &list, TIntermTyped *variable) 3038 { 3039 for(unsigned int i = 0; i < list.size(); i++) 3040 { 3041 if(list[i] == variable) 3042 { 3043 return i; // Pointer match 3044 } 3045 } 3046 3047 TIntermSymbol *varSymbol = variable->getAsSymbolNode(); 3048 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock(); 3049 3050 if(varBlock) 3051 { 3052 for(unsigned int i = 0; i < list.size(); i++) 3053 { 3054 if(list[i]) 3055 { 3056 TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock(); 3057 3058 if(listBlock) 3059 { 3060 if(listBlock->name() == varBlock->name()) 3061 { 3062 ASSERT(listBlock->arraySize() == varBlock->arraySize()); 3063 ASSERT(listBlock->fields() == varBlock->fields()); 3064 ASSERT(listBlock->blockStorage() == varBlock->blockStorage()); 3065 ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking()); 3066 3067 return i; 3068 } 3069 } 3070 } 3071 } 3072 } 3073 else if(varSymbol) 3074 { 3075 for(unsigned int i = 0; i < list.size(); i++) 3076 { 3077 if(list[i]) 3078 { 3079 TIntermSymbol *listSymbol = list[i]->getAsSymbolNode(); 3080 3081 if(listSymbol) 3082 { 3083 if(listSymbol->getId() == varSymbol->getId()) 3084 { 3085 ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol()); 3086 ASSERT(listSymbol->getType() == varSymbol->getType()); 3087 ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier()); 3088 3089 return i; 3090 } 3091 } 3092 } 3093 } 3094 } 3095 3096 return -1; 3097 } 3098 lookup(VariableArray & list,TInterfaceBlock * block)3099 int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block) 3100 { 3101 for(unsigned int i = 0; i < list.size(); i++) 3102 { 3103 if(list[i] && (list[i]->getType().getInterfaceBlock() == block)) 3104 { 3105 return i; // Pointer match 3106 } 3107 } 3108 return -1; 3109 } 3110 allocate(VariableArray & list,TIntermTyped * variable)3111 int OutputASM::allocate(VariableArray &list, TIntermTyped *variable) 3112 { 3113 int index = lookup(list, variable); 3114 3115 if(index == -1) 3116 { 3117 unsigned int registerCount = variable->blockRegisterCount(); 3118 3119 for(unsigned int i = 0; i < list.size(); i++) 3120 { 3121 if(list[i] == 0) 3122 { 3123 unsigned int j = 1; 3124 for( ; j < registerCount && (i + j) < list.size(); j++) 3125 { 3126 if(list[i + j] != 0) 3127 { 3128 break; 3129 } 3130 } 3131 3132 if(j == registerCount) // Found free slots 3133 { 3134 for(unsigned int j = 0; j < registerCount; j++) 3135 { 3136 list[i + j] = variable; 3137 } 3138 3139 return i; 3140 } 3141 } 3142 } 3143 3144 index = list.size(); 3145 3146 for(unsigned int i = 0; i < registerCount; i++) 3147 { 3148 list.push_back(variable); 3149 } 3150 } 3151 3152 return index; 3153 } 3154 free(VariableArray & list,TIntermTyped * variable)3155 void OutputASM::free(VariableArray &list, TIntermTyped *variable) 3156 { 3157 int index = lookup(list, variable); 3158 3159 if(index >= 0) 3160 { 3161 list[index] = 0; 3162 } 3163 } 3164 blockMemberLookup(const TType & type,const TString & name,int registerIndex)3165 int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex) 3166 { 3167 const TInterfaceBlock *block = type.getInterfaceBlock(); 3168 3169 if(block) 3170 { 3171 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3172 const TFieldList& fields = block->fields(); 3173 const TString &blockName = block->name(); 3174 int fieldRegisterIndex = registerIndex; 3175 3176 if(!type.isInterfaceBlock()) 3177 { 3178 // This is a uniform that's part of a block, let's see if the block is already defined 3179 for(size_t i = 0; i < activeUniformBlocks.size(); ++i) 3180 { 3181 if(activeUniformBlocks[i].name == blockName.c_str()) 3182 { 3183 // The block is already defined, find the register for the current uniform and return it 3184 for(size_t j = 0; j < fields.size(); j++) 3185 { 3186 const TString &fieldName = fields[j]->name(); 3187 if(fieldName == name) 3188 { 3189 return fieldRegisterIndex; 3190 } 3191 3192 fieldRegisterIndex += fields[j]->type()->totalRegisterCount(); 3193 } 3194 3195 ASSERT(false); 3196 return fieldRegisterIndex; 3197 } 3198 } 3199 } 3200 } 3201 3202 return -1; 3203 } 3204 declareUniform(const TType & type,const TString & name,int registerIndex,int blockId,BlockLayoutEncoder * encoder)3205 void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder) 3206 { 3207 const TStructure *structure = type.getStruct(); 3208 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr; 3209 3210 if(!structure && !block) 3211 { 3212 ActiveUniforms &activeUniforms = shaderObject->activeUniforms; 3213 const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo(); 3214 if(blockId >= 0) 3215 { 3216 blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type); 3217 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size()); 3218 } 3219 int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex; 3220 activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(), 3221 fieldRegisterIndex, blockId, blockInfo)); 3222 if(IsSampler(type.getBasicType())) 3223 { 3224 for(int i = 0; i < type.totalRegisterCount(); i++) 3225 { 3226 shader->declareSampler(fieldRegisterIndex + i); 3227 } 3228 } 3229 } 3230 else if(block) 3231 { 3232 ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks; 3233 const TFieldList& fields = block->fields(); 3234 const TString &blockName = block->name(); 3235 int fieldRegisterIndex = registerIndex; 3236 bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1); 3237 3238 blockId = activeUniformBlocks.size(); 3239 bool isRowMajor = block->matrixPacking() == EmpRowMajor; 3240 activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(), 3241 block->blockStorage(), isRowMajor, registerIndex, blockId)); 3242 blockDefinitions.push_back(BlockDefinitionIndexMap()); 3243 3244 Std140BlockEncoder currentBlockEncoder(isRowMajor); 3245 currentBlockEncoder.enterAggregateType(); 3246 for(size_t i = 0; i < fields.size(); i++) 3247 { 3248 const TType &fieldType = *(fields[i]->type()); 3249 const TString &fieldName = fields[i]->name(); 3250 if(isUniformBlockMember && (fieldName == name)) 3251 { 3252 registerIndex = fieldRegisterIndex; 3253 } 3254 3255 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName; 3256 3257 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, ¤tBlockEncoder); 3258 fieldRegisterIndex += fieldType.totalRegisterCount(); 3259 } 3260 currentBlockEncoder.exitAggregateType(); 3261 activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize(); 3262 } 3263 else 3264 { 3265 int fieldRegisterIndex = registerIndex; 3266 3267 const TFieldList& fields = structure->fields(); 3268 if(type.isArray() && (structure || type.isInterfaceBlock())) 3269 { 3270 for(int i = 0; i < type.getArraySize(); i++) 3271 { 3272 if(encoder) 3273 { 3274 encoder->enterAggregateType(); 3275 } 3276 for(size_t j = 0; j < fields.size(); j++) 3277 { 3278 const TType &fieldType = *(fields[j]->type()); 3279 const TString &fieldName = fields[j]->name(); 3280 const TString uniformName = name + "[" + str(i) + "]." + fieldName; 3281 3282 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder); 3283 fieldRegisterIndex += fieldType.totalRegisterCount(); 3284 } 3285 if(encoder) 3286 { 3287 encoder->exitAggregateType(); 3288 } 3289 } 3290 } 3291 else 3292 { 3293 if(encoder) 3294 { 3295 encoder->enterAggregateType(); 3296 } 3297 for(size_t i = 0; i < fields.size(); i++) 3298 { 3299 const TType &fieldType = *(fields[i]->type()); 3300 const TString &fieldName = fields[i]->name(); 3301 const TString uniformName = name + "." + fieldName; 3302 3303 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder); 3304 fieldRegisterIndex += fieldType.totalRegisterCount(); 3305 } 3306 if(encoder) 3307 { 3308 encoder->exitAggregateType(); 3309 } 3310 } 3311 } 3312 } 3313 glVariableType(const TType & type)3314 GLenum OutputASM::glVariableType(const TType &type) 3315 { 3316 switch(type.getBasicType()) 3317 { 3318 case EbtFloat: 3319 if(type.isScalar()) 3320 { 3321 return GL_FLOAT; 3322 } 3323 else if(type.isVector()) 3324 { 3325 switch(type.getNominalSize()) 3326 { 3327 case 2: return GL_FLOAT_VEC2; 3328 case 3: return GL_FLOAT_VEC3; 3329 case 4: return GL_FLOAT_VEC4; 3330 default: UNREACHABLE(type.getNominalSize()); 3331 } 3332 } 3333 else if(type.isMatrix()) 3334 { 3335 switch(type.getNominalSize()) 3336 { 3337 case 2: 3338 switch(type.getSecondarySize()) 3339 { 3340 case 2: return GL_FLOAT_MAT2; 3341 case 3: return GL_FLOAT_MAT2x3; 3342 case 4: return GL_FLOAT_MAT2x4; 3343 default: UNREACHABLE(type.getSecondarySize()); 3344 } 3345 case 3: 3346 switch(type.getSecondarySize()) 3347 { 3348 case 2: return GL_FLOAT_MAT3x2; 3349 case 3: return GL_FLOAT_MAT3; 3350 case 4: return GL_FLOAT_MAT3x4; 3351 default: UNREACHABLE(type.getSecondarySize()); 3352 } 3353 case 4: 3354 switch(type.getSecondarySize()) 3355 { 3356 case 2: return GL_FLOAT_MAT4x2; 3357 case 3: return GL_FLOAT_MAT4x3; 3358 case 4: return GL_FLOAT_MAT4; 3359 default: UNREACHABLE(type.getSecondarySize()); 3360 } 3361 default: UNREACHABLE(type.getNominalSize()); 3362 } 3363 } 3364 else UNREACHABLE(0); 3365 break; 3366 case EbtInt: 3367 if(type.isScalar()) 3368 { 3369 return GL_INT; 3370 } 3371 else if(type.isVector()) 3372 { 3373 switch(type.getNominalSize()) 3374 { 3375 case 2: return GL_INT_VEC2; 3376 case 3: return GL_INT_VEC3; 3377 case 4: return GL_INT_VEC4; 3378 default: UNREACHABLE(type.getNominalSize()); 3379 } 3380 } 3381 else UNREACHABLE(0); 3382 break; 3383 case EbtUInt: 3384 if(type.isScalar()) 3385 { 3386 return GL_UNSIGNED_INT; 3387 } 3388 else if(type.isVector()) 3389 { 3390 switch(type.getNominalSize()) 3391 { 3392 case 2: return GL_UNSIGNED_INT_VEC2; 3393 case 3: return GL_UNSIGNED_INT_VEC3; 3394 case 4: return GL_UNSIGNED_INT_VEC4; 3395 default: UNREACHABLE(type.getNominalSize()); 3396 } 3397 } 3398 else UNREACHABLE(0); 3399 break; 3400 case EbtBool: 3401 if(type.isScalar()) 3402 { 3403 return GL_BOOL; 3404 } 3405 else if(type.isVector()) 3406 { 3407 switch(type.getNominalSize()) 3408 { 3409 case 2: return GL_BOOL_VEC2; 3410 case 3: return GL_BOOL_VEC3; 3411 case 4: return GL_BOOL_VEC4; 3412 default: UNREACHABLE(type.getNominalSize()); 3413 } 3414 } 3415 else UNREACHABLE(0); 3416 break; 3417 case EbtSampler2D: 3418 return GL_SAMPLER_2D; 3419 case EbtISampler2D: 3420 return GL_INT_SAMPLER_2D; 3421 case EbtUSampler2D: 3422 return GL_UNSIGNED_INT_SAMPLER_2D; 3423 case EbtSamplerCube: 3424 return GL_SAMPLER_CUBE; 3425 case EbtISamplerCube: 3426 return GL_INT_SAMPLER_CUBE; 3427 case EbtUSamplerCube: 3428 return GL_UNSIGNED_INT_SAMPLER_CUBE; 3429 case EbtSamplerExternalOES: 3430 return GL_SAMPLER_EXTERNAL_OES; 3431 case EbtSampler3D: 3432 return GL_SAMPLER_3D_OES; 3433 case EbtISampler3D: 3434 return GL_INT_SAMPLER_3D; 3435 case EbtUSampler3D: 3436 return GL_UNSIGNED_INT_SAMPLER_3D; 3437 case EbtSampler2DArray: 3438 return GL_SAMPLER_2D_ARRAY; 3439 case EbtISampler2DArray: 3440 return GL_INT_SAMPLER_2D_ARRAY; 3441 case EbtUSampler2DArray: 3442 return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY; 3443 case EbtSampler2DShadow: 3444 return GL_SAMPLER_2D_SHADOW; 3445 case EbtSamplerCubeShadow: 3446 return GL_SAMPLER_CUBE_SHADOW; 3447 case EbtSampler2DArrayShadow: 3448 return GL_SAMPLER_2D_ARRAY_SHADOW; 3449 default: 3450 UNREACHABLE(type.getBasicType()); 3451 break; 3452 } 3453 3454 return GL_NONE; 3455 } 3456 glVariablePrecision(const TType & type)3457 GLenum OutputASM::glVariablePrecision(const TType &type) 3458 { 3459 if(type.getBasicType() == EbtFloat) 3460 { 3461 switch(type.getPrecision()) 3462 { 3463 case EbpHigh: return GL_HIGH_FLOAT; 3464 case EbpMedium: return GL_MEDIUM_FLOAT; 3465 case EbpLow: return GL_LOW_FLOAT; 3466 case EbpUndefined: 3467 // Should be defined as the default precision by the parser 3468 default: UNREACHABLE(type.getPrecision()); 3469 } 3470 } 3471 else if(type.getBasicType() == EbtInt) 3472 { 3473 switch(type.getPrecision()) 3474 { 3475 case EbpHigh: return GL_HIGH_INT; 3476 case EbpMedium: return GL_MEDIUM_INT; 3477 case EbpLow: return GL_LOW_INT; 3478 case EbpUndefined: 3479 // Should be defined as the default precision by the parser 3480 default: UNREACHABLE(type.getPrecision()); 3481 } 3482 } 3483 3484 // Other types (boolean, sampler) don't have a precision 3485 return GL_NONE; 3486 } 3487 dim(TIntermNode * v)3488 int OutputASM::dim(TIntermNode *v) 3489 { 3490 TIntermTyped *vector = v->getAsTyped(); 3491 ASSERT(vector && vector->isRegister()); 3492 return vector->getNominalSize(); 3493 } 3494 dim2(TIntermNode * m)3495 int OutputASM::dim2(TIntermNode *m) 3496 { 3497 TIntermTyped *matrix = m->getAsTyped(); 3498 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray()); 3499 return matrix->getSecondarySize(); 3500 } 3501 3502 // Returns ~0u if no loop count could be determined loopCount(TIntermLoop * node)3503 unsigned int OutputASM::loopCount(TIntermLoop *node) 3504 { 3505 // Parse loops of the form: 3506 // for(int index = initial; index [comparator] limit; index += increment) 3507 TIntermSymbol *index = 0; 3508 TOperator comparator = EOpNull; 3509 int initial = 0; 3510 int limit = 0; 3511 int increment = 0; 3512 3513 // Parse index name and intial value 3514 if(node->getInit()) 3515 { 3516 TIntermAggregate *init = node->getInit()->getAsAggregate(); 3517 3518 if(init) 3519 { 3520 TIntermSequence &sequence = init->getSequence(); 3521 TIntermTyped *variable = sequence[0]->getAsTyped(); 3522 3523 if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt) 3524 { 3525 TIntermBinary *assign = variable->getAsBinaryNode(); 3526 3527 if(assign && assign->getOp() == EOpInitialize) 3528 { 3529 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode(); 3530 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion(); 3531 3532 if(symbol && constant) 3533 { 3534 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3535 { 3536 index = symbol; 3537 initial = constant->getUnionArrayPointer()[0].getIConst(); 3538 } 3539 } 3540 } 3541 } 3542 } 3543 } 3544 3545 // Parse comparator and limit value 3546 if(index && node->getCondition()) 3547 { 3548 TIntermBinary *test = node->getCondition()->getAsBinaryNode(); 3549 TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr; 3550 3551 if(left && (left->getId() == index->getId())) 3552 { 3553 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion(); 3554 3555 if(constant) 3556 { 3557 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3558 { 3559 comparator = test->getOp(); 3560 limit = constant->getUnionArrayPointer()[0].getIConst(); 3561 } 3562 } 3563 } 3564 } 3565 3566 // Parse increment 3567 if(index && comparator != EOpNull && node->getExpression()) 3568 { 3569 TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode(); 3570 TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode(); 3571 3572 if(binaryTerminal) 3573 { 3574 TOperator op = binaryTerminal->getOp(); 3575 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion(); 3576 3577 if(constant) 3578 { 3579 if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1) 3580 { 3581 int value = constant->getUnionArrayPointer()[0].getIConst(); 3582 3583 switch(op) 3584 { 3585 case EOpAddAssign: increment = value; break; 3586 case EOpSubAssign: increment = -value; break; 3587 default: UNIMPLEMENTED(); 3588 } 3589 } 3590 } 3591 } 3592 else if(unaryTerminal) 3593 { 3594 TOperator op = unaryTerminal->getOp(); 3595 3596 switch(op) 3597 { 3598 case EOpPostIncrement: increment = 1; break; 3599 case EOpPostDecrement: increment = -1; break; 3600 case EOpPreIncrement: increment = 1; break; 3601 case EOpPreDecrement: increment = -1; break; 3602 default: UNIMPLEMENTED(); 3603 } 3604 } 3605 } 3606 3607 if(index && comparator != EOpNull && increment != 0) 3608 { 3609 if(comparator == EOpLessThanEqual) 3610 { 3611 comparator = EOpLessThan; 3612 limit += 1; 3613 } 3614 else if(comparator == EOpGreaterThanEqual) 3615 { 3616 comparator = EOpLessThan; 3617 limit -= 1; 3618 std::swap(initial, limit); 3619 increment = -increment; 3620 } 3621 else if(comparator == EOpGreaterThan) 3622 { 3623 comparator = EOpLessThan; 3624 std::swap(initial, limit); 3625 increment = -increment; 3626 } 3627 3628 if(comparator == EOpLessThan) 3629 { 3630 if(!(initial < limit)) // Never loops 3631 { 3632 return 0; 3633 } 3634 3635 int iterations = (limit - initial + abs(increment) - 1) / increment; // Ceiling division 3636 3637 if(iterations < 0) 3638 { 3639 return ~0u; 3640 } 3641 3642 return iterations; 3643 } 3644 else UNIMPLEMENTED(); // Falls through 3645 } 3646 3647 return ~0u; 3648 } 3649 traverse(TIntermNode * node)3650 bool LoopUnrollable::traverse(TIntermNode *node) 3651 { 3652 loopDepth = 0; 3653 loopUnrollable = true; 3654 3655 node->traverse(this); 3656 3657 return loopUnrollable; 3658 } 3659 visitLoop(Visit visit,TIntermLoop * loop)3660 bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop) 3661 { 3662 if(visit == PreVisit) 3663 { 3664 loopDepth++; 3665 } 3666 else if(visit == PostVisit) 3667 { 3668 loopDepth++; 3669 } 3670 3671 return true; 3672 } 3673 visitBranch(Visit visit,TIntermBranch * node)3674 bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node) 3675 { 3676 if(!loopUnrollable) 3677 { 3678 return false; 3679 } 3680 3681 if(!loopDepth) 3682 { 3683 return true; 3684 } 3685 3686 switch(node->getFlowOp()) 3687 { 3688 case EOpKill: 3689 case EOpReturn: 3690 break; 3691 case EOpBreak: 3692 case EOpContinue: 3693 loopUnrollable = false; 3694 break; 3695 default: UNREACHABLE(node->getFlowOp()); 3696 } 3697 3698 return loopUnrollable; 3699 } 3700 visitAggregate(Visit visit,TIntermAggregate * node)3701 bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node) 3702 { 3703 return loopUnrollable; 3704 } 3705 } 3706