1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_SpirvShader_hpp 16 #define sw_SpirvShader_hpp 17 18 #include "SamplerCore.hpp" 19 #include "ShaderCore.hpp" 20 #include "SpirvID.hpp" 21 #include "Device/Config.hpp" 22 #include "Device/Sampler.hpp" 23 #include "System/Debug.hpp" 24 #include "System/Math.hpp" 25 #include "System/Types.hpp" 26 #include "Vulkan/VkConfig.hpp" 27 #include "Vulkan/VkDescriptorSet.hpp" 28 29 #define SPV_ENABLE_UTILITY_CODE 30 #include <spirv/unified1/spirv.hpp> 31 32 #include <array> 33 #include <atomic> 34 #include <cstdint> 35 #include <cstring> 36 #include <deque> 37 #include <functional> 38 #include <memory> 39 #include <string> 40 #include <type_traits> 41 #include <unordered_map> 42 #include <unordered_set> 43 #include <vector> 44 45 #undef Yield // b/127920555 46 47 namespace vk { 48 49 class PipelineLayout; 50 class ImageView; 51 class Sampler; 52 class RenderPass; 53 struct SampledImageDescriptor; 54 55 namespace dbg { 56 class Context; 57 } // namespace dbg 58 59 } // namespace vk 60 61 namespace sw { 62 63 // Forward declarations. 64 class SpirvRoutine; 65 66 // Incrementally constructed complex bundle of rvalues 67 // Effectively a restricted vector, supporting only: 68 // - allocation to a (runtime-known) fixed component count 69 // - in-place construction of elements 70 // - const operator[] 71 class Intermediate 72 { 73 public: Intermediate(uint32_t componentCount)74 Intermediate(uint32_t componentCount) 75 : componentCount(componentCount) 76 , scalar(new rr::Value *[componentCount]) 77 { 78 for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; } 79 } 80 ~Intermediate()81 ~Intermediate() 82 { 83 delete[] scalar; 84 } 85 86 // TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to 87 // decide the format used to print the intermediate data. 88 enum class TypeHint 89 { 90 Float, 91 Int, 92 UInt 93 }; 94 move(uint32_t i,RValue<SIMD::Float> && scalar)95 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,RValue<SIMD::Int> && scalar)96 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,RValue<SIMD::UInt> && scalar)97 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 98 move(uint32_t i,const RValue<SIMD::Float> & scalar)99 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,const RValue<SIMD::Int> & scalar)100 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,const RValue<SIMD::UInt> & scalar)101 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 102 103 // Value retrieval functions. Float(uint32_t i) const104 RValue<SIMD::Float> Float(uint32_t i) const 105 { 106 ASSERT(i < componentCount); 107 ASSERT(scalar[i] != nullptr); 108 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) 109 } 110 Int(uint32_t i) const111 RValue<SIMD::Int> Int(uint32_t i) const 112 { 113 ASSERT(i < componentCount); 114 ASSERT(scalar[i] != nullptr); 115 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) 116 } 117 UInt(uint32_t i) const118 RValue<SIMD::UInt> UInt(uint32_t i) const 119 { 120 ASSERT(i < componentCount); 121 ASSERT(scalar[i] != nullptr); 122 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) 123 } 124 125 // No copy/move construction or assignment 126 Intermediate(Intermediate const &) = delete; 127 Intermediate(Intermediate &&) = delete; 128 Intermediate &operator=(Intermediate const &) = delete; 129 Intermediate &operator=(Intermediate &&) = delete; 130 131 const uint32_t componentCount; 132 133 private: emplace(uint32_t i,rr::Value * value,TypeHint type)134 void emplace(uint32_t i, rr::Value *value, TypeHint type) 135 { 136 ASSERT(i < componentCount); 137 ASSERT(scalar[i] == nullptr); 138 scalar[i] = value; 139 RR_PRINT_ONLY(typeHint = type;) 140 } 141 142 rr::Value **const scalar; 143 144 #ifdef ENABLE_RR_PRINT 145 friend struct rr::PrintValue::Ty<sw::Intermediate>; 146 TypeHint typeHint = TypeHint::Float; 147 #endif // ENABLE_RR_PRINT 148 }; 149 150 class SpirvShader 151 { 152 public: 153 using InsnStore = std::vector<uint32_t>; 154 InsnStore insns; 155 156 using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants); 157 158 enum class YieldResult 159 { 160 ControlBarrier, 161 }; 162 163 class Type; 164 class Object; 165 166 // Pseudo-iterator over SPIRV instructions, designed to support range-based-for. 167 class InsnIterator 168 { 169 public: 170 InsnIterator(InsnIterator const &other) = default; 171 172 InsnIterator() = default; 173 InsnIterator(InsnStore::const_iterator iter)174 explicit InsnIterator(InsnStore::const_iterator iter) 175 : iter{ iter } 176 { 177 } 178 opcode() const179 spv::Op opcode() const 180 { 181 return static_cast<spv::Op>(*iter & spv::OpCodeMask); 182 } 183 wordCount() const184 uint32_t wordCount() const 185 { 186 return *iter >> spv::WordCountShift; 187 } 188 word(uint32_t n) const189 uint32_t word(uint32_t n) const 190 { 191 ASSERT(n < wordCount()); 192 return iter[n]; 193 } 194 wordPointer(uint32_t n) const195 uint32_t const *wordPointer(uint32_t n) const 196 { 197 ASSERT(n < wordCount()); 198 return &iter[n]; 199 } 200 string(uint32_t n) const201 const char *string(uint32_t n) const 202 { 203 return reinterpret_cast<const char *>(wordPointer(n)); 204 } 205 hasResultAndType() const206 bool hasResultAndType() const 207 { 208 bool hasResult = false, hasResultType = false; 209 spv::HasResultAndType(opcode(), &hasResult, &hasResultType); 210 211 return hasResultType; 212 } 213 resultTypeId() const214 SpirvID<Type> resultTypeId() const 215 { 216 ASSERT(hasResultAndType()); 217 return word(1); 218 } 219 resultId() const220 SpirvID<Object> resultId() const 221 { 222 ASSERT(hasResultAndType()); 223 return word(2); 224 } 225 operator ==(InsnIterator const & other) const226 bool operator==(InsnIterator const &other) const 227 { 228 return iter == other.iter; 229 } 230 operator !=(InsnIterator const & other) const231 bool operator!=(InsnIterator const &other) const 232 { 233 return iter != other.iter; 234 } 235 operator *() const236 InsnIterator operator*() const 237 { 238 return *this; 239 } 240 operator ++()241 InsnIterator &operator++() 242 { 243 iter += wordCount(); 244 return *this; 245 } 246 operator ++(int)247 InsnIterator const operator++(int) 248 { 249 InsnIterator ret{ *this }; 250 iter += wordCount(); 251 return ret; 252 } 253 254 private: 255 InsnStore::const_iterator iter; 256 }; 257 258 /* range-based-for interface */ begin() const259 InsnIterator begin() const 260 { 261 return InsnIterator{ insns.cbegin() + 5 }; 262 } 263 end() const264 InsnIterator end() const 265 { 266 return InsnIterator{ insns.cend() }; 267 } 268 269 class Type 270 { 271 public: 272 using ID = SpirvID<Type>; 273 opcode() const274 spv::Op opcode() const { return definition.opcode(); } 275 276 InsnIterator definition; 277 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); 278 uint32_t componentCount = 0; 279 bool isBuiltInBlock = false; 280 281 // Inner element type for pointers, arrays, vectors and matrices. 282 ID element; 283 }; 284 285 class Object 286 { 287 public: 288 using ID = SpirvID<Object>; 289 opcode() const290 spv::Op opcode() const { return definition.opcode(); } typeId() const291 Type::ID typeId() const { return definition.resultTypeId(); } id() const292 Object::ID id() const { return definition.resultId(); } 293 294 InsnIterator definition; 295 std::vector<uint32_t> constantValue; 296 297 enum class Kind 298 { 299 // Invalid default kind. 300 // If we get left with an object in this state, the module was 301 // broken. 302 Unknown, 303 304 // TODO: Better document this kind. 305 // A shader interface variable pointer. 306 // Pointer with uniform address across all lanes. 307 // Pointer held by SpirvRoutine::pointers 308 InterfaceVariable, 309 310 // Constant value held by Object::constantValue. 311 Constant, 312 313 // Value held by SpirvRoutine::intermediates. 314 Intermediate, 315 316 // Pointer held by SpirvRoutine::pointers 317 Pointer, 318 319 // A pointer to a vk::DescriptorSet*. 320 // Pointer held by SpirvRoutine::pointers. 321 DescriptorSet, 322 }; 323 324 Kind kind = Kind::Unknown; 325 }; 326 327 // Block is an interval of SPIR-V instructions, starting with the 328 // opening OpLabel, and ending with a termination instruction. 329 class Block 330 { 331 public: 332 using ID = SpirvID<Block>; 333 using Set = std::unordered_set<ID>; 334 335 // Edge represents the graph edge between two blocks. 336 struct Edge 337 { 338 ID from; 339 ID to; 340 operator ==sw::SpirvShader::Block::Edge341 bool operator==(const Edge &other) const { return from == other.from && to == other.to; } 342 343 struct Hash 344 { operator ()sw::SpirvShader::Block::Edge::Hash345 std::size_t operator()(const Edge &edge) const noexcept 346 { 347 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); 348 } 349 }; 350 }; 351 352 Block() = default; 353 Block(const Block &other) = default; 354 explicit Block(InsnIterator begin, InsnIterator end); 355 356 /* range-based-for interface */ begin() const357 inline InsnIterator begin() const { return begin_; } end() const358 inline InsnIterator end() const { return end_; } 359 360 enum Kind 361 { 362 Simple, // OpBranch or other simple terminator. 363 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional 364 UnstructuredBranchConditional, // OpBranchConditional 365 StructuredSwitch, // OpSelectionMerge + OpSwitch 366 UnstructuredSwitch, // OpSwitch 367 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] 368 }; 369 370 Kind kind = Simple; 371 InsnIterator mergeInstruction; // Structured control flow merge instruction. 372 InsnIterator branchInstruction; // Branch instruction. 373 ID mergeBlock; // Structured flow merge block. 374 ID continueTarget; // Loop continue block. 375 Set ins; // Blocks that branch into this block. 376 Set outs; // Blocks that this block branches to. 377 bool isLoopMerge = false; 378 379 private: 380 InsnIterator begin_; 381 InsnIterator end_; 382 }; 383 384 class Function 385 { 386 public: 387 using ID = SpirvID<Function>; 388 389 // Walks all reachable the blocks starting from id adding them to 390 // reachable. 391 void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; 392 393 // AssignBlockFields() performs the following for all reachable blocks: 394 // * Assigns Block::ins with the identifiers of all blocks that contain 395 // this block in their Block::outs. 396 // * Sets Block::isLoopMerge to true if the block is the merge of a 397 // another loop block. 398 void AssignBlockFields(); 399 400 // ForeachBlockDependency calls f with each dependency of the given 401 // block. A dependency is an incoming block that is not a loop-back 402 // edge. 403 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; 404 405 // ExistsPath returns true if there's a direct or indirect flow from 406 // the 'from' block to the 'to' block that does not pass through 407 // notPassingThrough. 408 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; 409 getBlock(Block::ID id) const410 Block const &getBlock(Block::ID id) const 411 { 412 auto it = blocks.find(id); 413 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value()); 414 return it->second; 415 } 416 417 Block::ID entry; // function entry point block. 418 HandleMap<Block> blocks; // blocks belonging to this function. 419 Type::ID type; // type of the function. 420 Type::ID result; // return type. 421 }; 422 423 using String = std::string; 424 using StringID = SpirvID<std::string>; 425 426 class Extension 427 { 428 public: 429 using ID = SpirvID<Extension>; 430 431 enum Name 432 { 433 Unknown, 434 GLSLstd450, 435 OpenCLDebugInfo100 436 }; 437 438 Name name; 439 }; 440 441 struct TypeOrObject 442 {}; 443 444 // TypeOrObjectID is an identifier that represents a Type or an Object, 445 // and supports implicit casting to and from Type::ID or Object::ID. 446 class TypeOrObjectID : public SpirvID<TypeOrObject> 447 { 448 public: 449 using Hash = std::hash<SpirvID<TypeOrObject>>; 450 TypeOrObjectID(uint32_t id)451 inline TypeOrObjectID(uint32_t id) 452 : SpirvID(id) 453 {} TypeOrObjectID(Type::ID id)454 inline TypeOrObjectID(Type::ID id) 455 : SpirvID(id.value()) 456 {} TypeOrObjectID(Object::ID id)457 inline TypeOrObjectID(Object::ID id) 458 : SpirvID(id.value()) 459 {} operator Type::ID() const460 inline operator Type::ID() const { return Type::ID(value()); } operator Object::ID() const461 inline operator Object::ID() const { return Object::ID(value()); } 462 }; 463 464 // OpImageSample variants 465 enum Variant 466 { 467 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. 468 Dref, 469 Proj, 470 ProjDref, 471 VARIANT_LAST = ProjDref 472 }; 473 474 // Compact representation of image instruction parameters that is passed to the 475 // trampoline function for retrieving/generating the corresponding sampling routine. 476 struct ImageInstruction 477 { ImageInstructionsw::SpirvShader::ImageInstruction478 ImageInstruction(Variant variant, SamplerMethod samplerMethod) 479 : parameters(0) 480 { 481 this->variant = variant; 482 this->samplerMethod = samplerMethod; 483 } 484 485 // Unmarshal from raw 32-bit data ImageInstructionsw::SpirvShader::ImageInstruction486 ImageInstruction(uint32_t parameters) 487 : parameters(parameters) 488 {} 489 getSamplerFunctionsw::SpirvShader::ImageInstruction490 SamplerFunction getSamplerFunction() const 491 { 492 return { static_cast<SamplerMethod>(samplerMethod), offset != 0, sample != 0 }; 493 } 494 isDrefsw::SpirvShader::ImageInstruction495 bool isDref() const 496 { 497 return (variant == Dref) || (variant == ProjDref); 498 } 499 isProjsw::SpirvShader::ImageInstruction500 bool isProj() const 501 { 502 return (variant == Proj) || (variant == ProjDref); 503 } 504 505 union 506 { 507 struct 508 { 509 uint32_t variant : BITS(VARIANT_LAST); 510 uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST); 511 uint32_t gatherComponent : 2; 512 513 // Parameters are passed to the sampling routine in this order: 514 uint32_t coordinates : 3; // 1-4 (does not contain projection component) 515 /* uint32_t dref : 1; */ // Indicated by Variant::ProjDref|Dref 516 /* uint32_t lodOrBias : 1; */ // Indicated by SamplerMethod::Lod|Bias|Fetch 517 uint32_t grad : 2; // 0-3 components (for each of dx / dy) 518 uint32_t offset : 2; // 0-3 components 519 uint32_t sample : 1; // 0-1 scalar integer 520 }; 521 522 uint32_t parameters; 523 }; 524 }; 525 526 static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit"); 527 528 // This method is for retrieving an ID that uniquely identifies the 529 // shader entry point represented by this object. getSerialID() const530 uint64_t getSerialID() const 531 { 532 return ((uint64_t)entryPoint.value() << 32) | codeSerialID; 533 } 534 535 SpirvShader(uint32_t codeSerialID, 536 VkShaderStageFlagBits stage, 537 const char *entryPointName, 538 InsnStore const &insns, 539 const vk::RenderPass *renderPass, 540 uint32_t subpassIndex, 541 bool robustBufferAccess, 542 const std::shared_ptr<vk::dbg::Context> &dbgctx); 543 544 ~SpirvShader(); 545 546 struct Modes 547 { 548 bool EarlyFragmentTests : 1; 549 bool DepthReplacing : 1; 550 bool DepthGreater : 1; 551 bool DepthLess : 1; 552 bool DepthUnchanged : 1; 553 bool ContainsKill : 1; 554 bool ContainsControlBarriers : 1; 555 bool NeedsCentroid : 1; 556 bool ContainsSampleQualifier : 1; 557 558 // Compute workgroup dimensions 559 int WorkgroupSizeX = 1; 560 int WorkgroupSizeY = 1; 561 int WorkgroupSizeZ = 1; 562 }; 563 getModes() const564 Modes const &getModes() const 565 { 566 return modes; 567 } 568 569 struct Capabilities 570 { 571 bool Matrix : 1; 572 bool Shader : 1; 573 bool StorageImageMultisample : 1; 574 bool ClipDistance : 1; 575 bool CullDistance : 1; 576 bool ImageCubeArray : 1; 577 bool SampleRateShading : 1; 578 bool InputAttachment : 1; 579 bool Sampled1D : 1; 580 bool Image1D : 1; 581 bool SampledBuffer : 1; 582 bool SampledCubeArray : 1; 583 bool ImageBuffer : 1; 584 bool ImageMSArray : 1; 585 bool StorageImageExtendedFormats : 1; 586 bool ImageQuery : 1; 587 bool DerivativeControl : 1; 588 bool InterpolationFunction : 1; 589 bool GroupNonUniform : 1; 590 bool GroupNonUniformVote : 1; 591 bool GroupNonUniformBallot : 1; 592 bool GroupNonUniformShuffle : 1; 593 bool GroupNonUniformShuffleRelative : 1; 594 bool GroupNonUniformArithmetic : 1; 595 bool DeviceGroup : 1; 596 bool MultiView : 1; 597 bool StencilExportEXT : 1; 598 }; 599 getUsedCapabilities() const600 Capabilities const &getUsedCapabilities() const 601 { 602 return capabilities; 603 } 604 605 // getNumOutputClipDistances() returns the number of ClipDistances 606 // outputted by this shader. getNumOutputClipDistances() const607 unsigned int getNumOutputClipDistances() const 608 { 609 if(getUsedCapabilities().ClipDistance) 610 { 611 auto it = outputBuiltins.find(spv::BuiltInClipDistance); 612 if(it != outputBuiltins.end()) 613 { 614 return it->second.SizeInComponents; 615 } 616 } 617 return 0; 618 } 619 620 // getNumOutputCullDistances() returns the number of CullDistances 621 // outputted by this shader. getNumOutputCullDistances() const622 unsigned int getNumOutputCullDistances() const 623 { 624 if(getUsedCapabilities().CullDistance) 625 { 626 auto it = outputBuiltins.find(spv::BuiltInCullDistance); 627 if(it != outputBuiltins.end()) 628 { 629 return it->second.SizeInComponents; 630 } 631 } 632 return 0; 633 } 634 635 enum AttribType : unsigned char 636 { 637 ATTRIBTYPE_FLOAT, 638 ATTRIBTYPE_INT, 639 ATTRIBTYPE_UINT, 640 ATTRIBTYPE_UNUSED, 641 642 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT 643 }; 644 hasBuiltinInput(spv::BuiltIn b) const645 bool hasBuiltinInput(spv::BuiltIn b) const 646 { 647 return inputBuiltins.find(b) != inputBuiltins.end(); 648 } 649 hasBuiltinOutput(spv::BuiltIn b) const650 bool hasBuiltinOutput(spv::BuiltIn b) const 651 { 652 return outputBuiltins.find(b) != outputBuiltins.end(); 653 } 654 655 struct Decorations 656 { 657 int32_t Location = -1; 658 int32_t Component = 0; 659 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); 660 int32_t Offset = -1; 661 int32_t ArrayStride = -1; 662 int32_t MatrixStride = 1; 663 664 bool HasLocation : 1; 665 bool HasComponent : 1; 666 bool HasBuiltIn : 1; 667 bool HasOffset : 1; 668 bool HasArrayStride : 1; 669 bool HasMatrixStride : 1; 670 bool HasRowMajor : 1; // whether RowMajor bit is valid. 671 672 bool Flat : 1; 673 bool Centroid : 1; 674 bool NoPerspective : 1; 675 bool Block : 1; 676 bool BufferBlock : 1; 677 bool RelaxedPrecision : 1; 678 bool RowMajor : 1; // RowMajor if true; ColMajor if false 679 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. 680 Decorationssw::SpirvShader::Decorations681 Decorations() 682 : Location{ -1 } 683 , Component{ 0 } 684 , BuiltIn{ static_cast<spv::BuiltIn>(-1) } 685 , Offset{ -1 } 686 , ArrayStride{ -1 } 687 , MatrixStride{ -1 } 688 , HasLocation{ false } 689 , HasComponent{ false } 690 , HasBuiltIn{ false } 691 , HasOffset{ false } 692 , HasArrayStride{ false } 693 , HasMatrixStride{ false } 694 , HasRowMajor{ false } 695 , Flat{ false } 696 , Centroid{ false } 697 , NoPerspective{ false } 698 , Block{ false } 699 , BufferBlock{ false } 700 , RelaxedPrecision{ false } 701 , RowMajor{ false } 702 , InsideMatrix{ false } 703 { 704 } 705 706 Decorations(Decorations const &) = default; 707 708 void Apply(Decorations const &src); 709 710 void Apply(spv::Decoration decoration, uint32_t arg); 711 }; 712 713 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; 714 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; 715 716 struct DescriptorDecorations 717 { 718 int32_t DescriptorSet = -1; 719 int32_t Binding = -1; 720 int32_t InputAttachmentIndex = -1; 721 722 void Apply(DescriptorDecorations const &src); 723 }; 724 725 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; 726 std::vector<VkFormat> inputAttachmentFormats; 727 728 struct InterfaceComponent 729 { 730 AttribType Type; 731 732 union 733 { 734 struct 735 { 736 bool Flat : 1; 737 bool Centroid : 1; 738 bool NoPerspective : 1; 739 }; 740 741 uint8_t DecorationBits; 742 }; 743 InterfaceComponentsw::SpirvShader::InterfaceComponent744 InterfaceComponent() 745 : Type{ ATTRIBTYPE_UNUSED } 746 , DecorationBits{ 0 } 747 { 748 } 749 }; 750 751 struct BuiltinMapping 752 { 753 Object::ID Id; 754 uint32_t FirstComponent; 755 uint32_t SizeInComponents; 756 }; 757 758 struct WorkgroupMemory 759 { 760 // allocates a new variable of size bytes with the given identifier. allocatesw::SpirvShader::WorkgroupMemory761 inline void allocate(Object::ID id, uint32_t size) 762 { 763 uint32_t offset = totalSize; 764 auto it = offsets.emplace(id, offset); 765 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value())); 766 totalSize += size; 767 } 768 // returns the byte offset of the variable with the given identifier. offsetOfsw::SpirvShader::WorkgroupMemory769 inline uint32_t offsetOf(Object::ID id) const 770 { 771 auto it = offsets.find(id); 772 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value())); 773 return it->second; 774 } 775 // returns the total allocated size in bytes. sizesw::SpirvShader::WorkgroupMemory776 inline uint32_t size() const { return totalSize; } 777 778 private: 779 uint32_t totalSize = 0; // in bytes 780 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes 781 }; 782 783 std::vector<InterfaceComponent> inputs; 784 std::vector<InterfaceComponent> outputs; 785 786 void emitProlog(SpirvRoutine *routine) const; 787 void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const; 788 void emitEpilog(SpirvRoutine *routine) const; 789 void clearPhis(SpirvRoutine *routine) const; 790 containsImageWrite() const791 bool containsImageWrite() const { return imageWriteEmitted; } 792 793 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; 794 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; 795 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; 796 WorkgroupMemory workgroupMemory; 797 798 private: 799 const uint32_t codeSerialID; 800 Modes modes = {}; 801 Capabilities capabilities = {}; 802 HandleMap<Type> types; 803 HandleMap<Object> defs; 804 HandleMap<Function> functions; 805 std::unordered_map<StringID, String> strings; 806 HandleMap<Extension> extensionsByID; 807 std::unordered_set<uint32_t> extensionsImported; 808 Function::ID entryPoint; 809 mutable bool imageWriteEmitted = false; 810 811 const bool robustBufferAccess = true; 812 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. 813 814 // DeclareType creates a Type for the given OpTypeX instruction, storing 815 // it into the types map. It is called from the analysis pass (constructor). 816 void DeclareType(InsnIterator insn); 817 818 void ProcessExecutionMode(InsnIterator it); 819 820 uint32_t ComputeTypeSize(InsnIterator insn); 821 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; 822 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; 823 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const; 824 825 // Creates an Object for the instruction's result in 'defs'. 826 void DefineResult(const InsnIterator &insn); 827 828 // Processes the OpenCL.Debug.100 instruction for the initial definition 829 // pass of the SPIR-V. 830 void DefineOpenCLDebugInfo100(const InsnIterator &insn); 831 832 // Returns true if data in the given storage class is word-interleaved 833 // by each SIMD vector lane, otherwise data is stored linerally. 834 // 835 // Each lane addresses a single word, picked by a base pointer and an 836 // integer offset. 837 // 838 // A word is currently 32 bits (single float, int32_t, uint32_t). 839 // A lane is a single element of a SIMD vector register. 840 // 841 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): 842 // --------------------------------------------------------------------- 843 // 844 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) 845 // 846 // Assuming SIMD::Width == 4: 847 // 848 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 849 // ===========+===========+===========+========== 850 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] 851 // ---------------+-----------+-----------+-----------+---------- 852 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] 853 // ---------------+-----------+-----------+-----------+---------- 854 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] 855 // ---------------+-----------+-----------+-----------+---------- 856 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] 857 // 858 // 859 // Linear storage - (IsStorageInterleavedByLane() == false): 860 // --------------------------------------------------------- 861 // 862 // Address = PtrBase + sizeof(Word) * LaneOffset 863 // 864 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 865 // ===========+===========+===========+========== 866 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] 867 // ---------------+-----------+-----------+-----------+---------- 868 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] 869 // ---------------+-----------+-----------+-----------+---------- 870 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] 871 // ---------------+-----------+-----------+-----------+---------- 872 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] 873 // 874 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); 875 static bool IsExplicitLayout(spv::StorageClass storageClass); 876 877 static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p); 878 879 // Output storage buffers and images should not be affected by helper invocations 880 static bool StoresInHelperInvocation(spv::StorageClass storageClass); 881 882 using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; 883 884 void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; 885 886 int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; 887 888 // MemoryElement describes a scalar element within a structure, and is 889 // used by the callback function of VisitMemoryObject(). 890 struct MemoryElement 891 { 892 uint32_t index; // index of the scalar element 893 uint32_t offset; // offset (in bytes) from the base of the object 894 const Type &type; // element type 895 }; 896 897 using MemoryVisitor = std::function<void(const MemoryElement &)>; 898 899 // VisitMemoryObject() walks a type tree in an explicitly laid out 900 // storage class, calling the MemoryVisitor for each scalar element 901 // within the 902 void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const; 903 904 // VisitMemoryObjectInner() is internally called by VisitMemoryObject() 905 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const; 906 907 Object &CreateConstant(InsnIterator it); 908 909 void ProcessInterfaceVariable(Object &object); 910 911 // EmitState holds control-flow state for the emit() pass. 912 class EmitState 913 { 914 public: EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,bool robustBufferAccess,unsigned int multiSampleCount,spv::ExecutionModel executionModel)915 EmitState(SpirvRoutine *routine, 916 Function::ID function, 917 RValue<SIMD::Int> activeLaneMask, 918 RValue<SIMD::Int> storesAndAtomicsMask, 919 const vk::DescriptorSet::Bindings &descriptorSets, 920 bool robustBufferAccess, 921 unsigned int multiSampleCount, 922 spv::ExecutionModel executionModel) 923 : routine(routine) 924 , function(function) 925 , activeLaneMaskValue(activeLaneMask.value()) 926 , storesAndAtomicsMaskValue(storesAndAtomicsMask.value()) 927 , descriptorSets(descriptorSets) 928 , robustBufferAccess(robustBufferAccess) 929 , multiSampleCount(multiSampleCount) 930 , executionModel(executionModel) 931 { 932 ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting. 933 } 934 935 // Returns the mask describing the active lanes as updated by dynamic 936 // control flow. Active lanes include helper invocations, used for 937 // calculating fragment derivitives, which must not perform memory 938 // stores or atomic writes. 939 // 940 // Use activeStoresAndAtomicsMask() to consider both control flow and 941 // lanes which are permitted to perform memory stores and atomic 942 // operations activeLaneMask() const943 RValue<SIMD::Int> activeLaneMask() const 944 { 945 ASSERT(activeLaneMaskValue != nullptr); 946 return RValue<SIMD::Int>(activeLaneMaskValue); 947 } 948 949 // Returns the immutable lane mask that describes which lanes are 950 // permitted to perform memory stores and atomic operations. 951 // Note that unlike activeStoresAndAtomicsMask() this mask *does not* 952 // consider lanes that have been made inactive due to control flow. storesAndAtomicsMask() const953 RValue<SIMD::Int> storesAndAtomicsMask() const 954 { 955 ASSERT(storesAndAtomicsMaskValue != nullptr); 956 return RValue<SIMD::Int>(storesAndAtomicsMaskValue); 957 } 958 959 // Returns a lane mask that describes which lanes are permitted to 960 // perform memory stores and atomic operations, considering lanes that 961 // may have been made inactive due to control flow. activeStoresAndAtomicsMask() const962 RValue<SIMD::Int> activeStoresAndAtomicsMask() const 963 { 964 return activeLaneMask() & storesAndAtomicsMask(); 965 } 966 967 // Add a new active lane mask edge from the current block to out. 968 // The edge mask value will be (mask AND activeLaneMaskValue). 969 // If multiple active lane masks are added for the same edge, then 970 // they will be ORed together. 971 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); 972 973 // Add a new active lane mask for the edge from -> to. 974 // If multiple active lane masks are added for the same edge, then 975 // they will be ORed together. 976 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); 977 978 SpirvRoutine *routine = nullptr; // The current routine being built. 979 Function::ID function; // The current function being built. 980 Block::ID block; // The current block being built. 981 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. 982 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. 983 Block::Set visited; // Blocks already built. 984 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; 985 std::deque<Block::ID> *pending; 986 987 const vk::DescriptorSet::Bindings &descriptorSets; 988 989 OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; 990 getMultiSampleCount() const991 unsigned int getMultiSampleCount() const { return multiSampleCount; } 992 createIntermediate(Object::ID id,uint32_t componentCount)993 Intermediate &createIntermediate(Object::ID id, uint32_t componentCount) 994 { 995 auto it = intermediates.emplace(std::piecewise_construct, 996 std::forward_as_tuple(id), 997 std::forward_as_tuple(componentCount)); 998 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); 999 return it.first->second; 1000 } 1001 getIntermediate(Object::ID id) const1002 Intermediate const &getIntermediate(Object::ID id) const 1003 { 1004 auto it = intermediates.find(id); 1005 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); 1006 return it->second; 1007 } 1008 createPointer(Object::ID id,SIMD::Pointer ptr)1009 void createPointer(Object::ID id, SIMD::Pointer ptr) 1010 { 1011 bool added = pointers.emplace(id, ptr).second; 1012 ASSERT_MSG(added, "Pointer %d created twice", id.value()); 1013 } 1014 getPointer(Object::ID id) const1015 SIMD::Pointer const &getPointer(Object::ID id) const 1016 { 1017 auto it = pointers.find(id); 1018 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); 1019 return it->second; 1020 } 1021 1022 private: 1023 std::unordered_map<Object::ID, Intermediate> intermediates; 1024 std::unordered_map<Object::ID, SIMD::Pointer> pointers; 1025 1026 const bool robustBufferAccess = true; // Emit robustBufferAccess safe code. 1027 const unsigned int multiSampleCount = 0; 1028 const spv::ExecutionModel executionModel = spv::ExecutionModelMax; 1029 }; 1030 1031 // EmitResult is an enumerator of result values from the Emit functions. 1032 enum class EmitResult 1033 { 1034 Continue, // No termination instructions. 1035 Terminator, // Reached a termination instruction. 1036 }; 1037 1038 // Generic wrapper over either per-lane intermediate value, or a constant. 1039 // Constants are transparently widened to per-lane values in operator[]. 1040 // This is appropriate in most cases -- if we're not going to do something 1041 // significantly different based on whether the value is uniform across lanes. 1042 class Operand 1043 { 1044 public: 1045 Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId); 1046 Operand(const Intermediate &value); 1047 Float(uint32_t i) const1048 RValue<SIMD::Float> Float(uint32_t i) const 1049 { 1050 if(intermediate) 1051 { 1052 return intermediate->Float(i); 1053 } 1054 1055 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact 1056 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". 1057 // Thus we must first construct an integer constant, and bitcast to float. 1058 return As<SIMD::Float>(SIMD::UInt(constant[i])); 1059 } 1060 Int(uint32_t i) const1061 RValue<SIMD::Int> Int(uint32_t i) const 1062 { 1063 if(intermediate) 1064 { 1065 return intermediate->Int(i); 1066 } 1067 1068 return SIMD::Int(constant[i]); 1069 } 1070 UInt(uint32_t i) const1071 RValue<SIMD::UInt> UInt(uint32_t i) const 1072 { 1073 if(intermediate) 1074 { 1075 return intermediate->UInt(i); 1076 } 1077 1078 return SIMD::UInt(constant[i]); 1079 } 1080 1081 bool isConstantZero() const; 1082 1083 private: 1084 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1085 1086 // Delegate constructor 1087 Operand(const EmitState *state, const Object &object); 1088 1089 const uint32_t *constant; 1090 const Intermediate *intermediate; 1091 1092 public: 1093 const uint32_t componentCount; 1094 }; 1095 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1096 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1097 1098 Type const &getType(Type::ID id) const 1099 { 1100 auto it = types.find(id); 1101 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value()); 1102 return it->second; 1103 } 1104 getType(const Object & object) const1105 Type const &getType(const Object &object) const 1106 { 1107 return getType(object.typeId()); 1108 } 1109 getObject(Object::ID id) const1110 Object const &getObject(Object::ID id) const 1111 { 1112 auto it = defs.find(id); 1113 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value()); 1114 return it->second; 1115 } 1116 getFunction(Function::ID id) const1117 Function const &getFunction(Function::ID id) const 1118 { 1119 auto it = functions.find(id); 1120 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value()); 1121 return it->second; 1122 } 1123 getString(StringID id) const1124 String const &getString(StringID id) const 1125 { 1126 auto it = strings.find(id); 1127 ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value()); 1128 return it->second; 1129 } 1130 getExtension(Extension::ID id) const1131 Extension const &getExtension(Extension::ID id) const 1132 { 1133 auto it = extensionsByID.find(id); 1134 ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value()); 1135 return it->second; 1136 } 1137 1138 // Returns a SIMD::Pointer to the underlying data for the given pointer 1139 // object. 1140 // Handles objects of the following kinds: 1141 // - DescriptorSet 1142 // - Pointer 1143 // - InterfaceVariable 1144 // Calling GetPointerToData with objects of any other kind will assert. 1145 SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const; 1146 1147 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; 1148 SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; 1149 1150 // Returns the *component* offset in the literal for the given access chain. 1151 uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const; 1152 1153 // Lookup the active lane mask for the edge from -> to. 1154 // If from is unreachable, then a mask of all zeros is returned. 1155 // Asserts if from is reachable and the edge does not exist. 1156 RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const; 1157 1158 // Updates the current active lane mask. 1159 void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1160 1161 // Emit all the unvisited blocks (except for ignore) in DFS order, 1162 // starting with id. 1163 void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const; 1164 void EmitNonLoop(EmitState *state) const; 1165 void EmitLoop(EmitState *state) const; 1166 1167 void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const; 1168 EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const; 1169 1170 // Emit pass instructions: 1171 EmitResult EmitVariable(InsnIterator insn, EmitState *state) const; 1172 EmitResult EmitLoad(InsnIterator insn, EmitState *state) const; 1173 EmitResult EmitStore(InsnIterator insn, EmitState *state) const; 1174 EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const; 1175 EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const; 1176 EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const; 1177 EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const; 1178 EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const; 1179 EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const; 1180 EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const; 1181 EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const; 1182 EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const; 1183 EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const; 1184 EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const; 1185 EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const; 1186 EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const; 1187 EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const; 1188 EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const; 1189 EmitResult EmitDot(InsnIterator insn, EmitState *state) const; 1190 EmitResult EmitSelect(InsnIterator insn, EmitState *state) const; 1191 EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const; 1192 EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const; 1193 EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const; 1194 EmitResult EmitLine(InsnIterator insn, EmitState *state) const; 1195 EmitResult EmitAny(InsnIterator insn, EmitState *state) const; 1196 EmitResult EmitAll(InsnIterator insn, EmitState *state) const; 1197 EmitResult EmitBranch(InsnIterator insn, EmitState *state) const; 1198 EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const; 1199 EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const; 1200 EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const; 1201 EmitResult EmitReturn(InsnIterator insn, EmitState *state) const; 1202 EmitResult EmitKill(InsnIterator insn, EmitState *state) const; 1203 EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const; 1204 EmitResult EmitPhi(InsnIterator insn, EmitState *state) const; 1205 EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; 1206 EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; 1207 EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const; 1208 EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const; 1209 EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const; 1210 EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const; 1211 EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const; 1212 EmitResult EmitImageQueryLod(InsnIterator insn, EmitState *state) const; 1213 EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const; 1214 EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const; 1215 EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const; 1216 EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const; 1217 EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const; 1218 EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; 1219 EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; 1220 EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const; 1221 EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const; 1222 EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const; 1223 EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const; 1224 EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const; 1225 EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const; 1226 EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; 1227 1228 // Emits code to sample an image, regardless of whether any SIMD lanes are active. 1229 void EmitImageSampleUnconditional(Array<SIMD::Float> &out, ImageInstruction instruction, InsnIterator insn, EmitState *state) const; 1230 1231 void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; 1232 SIMD::Pointer GetTexelAddress(EmitState const *state, Pointer<Byte> imageBase, Int imageSizeInBytes, Operand const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect, OutOfBoundsBehavior outOfBoundsBehavior) const; 1233 uint32_t GetConstScalarInt(Object::ID id) const; 1234 void EvalSpecConstantOp(InsnIterator insn); 1235 void EvalSpecConstantUnaryOp(InsnIterator insn); 1236 void EvalSpecConstantBinaryOp(InsnIterator insn); 1237 1238 // Fragment input interpolation functions 1239 uint32_t GetNumInputComponents(int32_t location) const; 1240 enum InterpolationType 1241 { 1242 Centroid, 1243 AtSample, 1244 AtOffset, 1245 }; 1246 SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, uint32_t component, 1247 uint32_t component_count, EmitState *state, InterpolationType type) const; 1248 1249 // Helper for implementing OpStore, which doesn't take an InsnIterator so it 1250 // can also store independent operands. 1251 void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const; 1252 1253 // LoadPhi loads the phi values from the alloca storage and places the 1254 // load values into the intermediate with the phi's result id. 1255 void LoadPhi(InsnIterator insn, EmitState *state) const; 1256 1257 // StorePhi updates the phi's alloca storage value using the incoming 1258 // values from blocks that are both in the OpPhi instruction and in 1259 // filter. 1260 void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const; 1261 1262 // Emits a rr::Fence for the given MemorySemanticsMask. 1263 void Fence(spv::MemorySemanticsMask semantics) const; 1264 1265 // Helper for calling rr::Yield with res cast to an rr::Int. 1266 void Yield(YieldResult res) const; 1267 1268 // WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's 1269 // control flow to the given file path. 1270 void WriteCFGGraphVizDotFile(const char *path) const; 1271 1272 // OpcodeName() returns the name of the opcode op. 1273 static const char *OpcodeName(spv::Op op); 1274 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); 1275 1276 // IsStatement() returns true if the given opcode actually performs 1277 // work (as opposed to declaring a type, defining a function start / end, 1278 // etc). 1279 static bool IsStatement(spv::Op op); 1280 1281 // HasTypeAndResult() returns true if the given opcode's instruction 1282 // has a result type ID and result ID, i.e. defines an Object. 1283 static bool HasTypeAndResult(spv::Op op); 1284 1285 // Helper as we often need to take dot products as part of doing other things. 1286 SIMD::Float Dot(unsigned numComponents, Operand const &x, Operand const &y) const; 1287 1288 // Splits x into a floating-point significand in the range [0.5, 1.0) 1289 // and an integral exponent of two, such that: 1290 // x = significand * 2^exponent 1291 // Returns the pair <significand, exponent> 1292 std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const; 1293 1294 static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler); 1295 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState); 1296 1297 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. 1298 static sw::FilterType convertFilterMode(const vk::Sampler *sampler, VkImageViewType imageViewType, ImageInstruction instruction); 1299 static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler); 1300 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType); 1301 1302 // Returns 0 when invalid. 1303 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); 1304 1305 // Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these 1306 // are all no-ops. 1307 1308 // dbgInit() initializes the debugger code generation. 1309 // All other dbgXXX() functions are no-op until this is called. 1310 void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx); 1311 1312 // dbgTerm() terminates the debugger code generation. 1313 void dbgTerm(); 1314 1315 // dbgCreateFile() generates a synthetic file containing the disassembly 1316 // of the SPIR-V shader. This is the file displayed in the debug 1317 // session. 1318 void dbgCreateFile(); 1319 1320 // dbgBeginEmit() sets up the debugging state for the shader. 1321 void dbgBeginEmit(EmitState *state) const; 1322 1323 // dbgEndEmit() tears down the debugging state for the shader. 1324 void dbgEndEmit(EmitState *state) const; 1325 1326 // dbgBeginEmitInstruction() updates the current debugger location for 1327 // the given instruction. 1328 void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const; 1329 1330 // dbgEndEmitInstruction() creates any new debugger variables for the 1331 // instruction that just completed. 1332 void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const; 1333 1334 // dbgExposeIntermediate() exposes the intermediate with the given ID to 1335 // the debugger. 1336 void dbgExposeIntermediate(Object::ID id, EmitState *state) const; 1337 1338 // dbgUpdateActiveLaneMask() updates the active lane masks to the 1339 // debugger. 1340 void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1341 1342 // dbgDeclareResult() associates resultId as the result of the given 1343 // instruction. 1344 void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const; 1345 1346 // Impl holds forward declaration structs and pointers to state for the 1347 // private implementations in the corresponding SpirvShaderXXX.cpp files. 1348 // This allows access to the private members of the SpirvShader, without 1349 // littering the header with implementation details. 1350 struct Impl 1351 { 1352 struct Debugger; 1353 struct Group; 1354 Debugger *debugger = nullptr; 1355 }; 1356 Impl impl; 1357 }; 1358 1359 class SpirvRoutine 1360 { 1361 public: 1362 SpirvRoutine(vk::PipelineLayout const *pipelineLayout); 1363 1364 using Variable = Array<SIMD::Float>; 1365 1366 struct SamplerCache 1367 { 1368 Pointer<Byte> imageDescriptor = nullptr; 1369 Pointer<Byte> sampler; 1370 Pointer<Byte> function; 1371 }; 1372 1373 struct InterpolationData 1374 { 1375 Pointer<Byte> primitive; 1376 SIMD::Float x; 1377 SIMD::Float y; 1378 SIMD::Float rhw; 1379 SIMD::Float xCentroid; 1380 SIMD::Float yCentroid; 1381 SIMD::Float rhwCentroid; 1382 }; 1383 1384 vk::PipelineLayout const *const pipelineLayout; 1385 1386 std::unordered_map<SpirvShader::Object::ID, Variable> variables; 1387 std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache; 1388 Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS }; 1389 Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS }; 1390 InterpolationData interpolationData; 1391 1392 Pointer<Byte> workgroupMemory; 1393 Pointer<Pointer<Byte>> descriptorSets; 1394 Pointer<Int> descriptorDynamicOffsets; 1395 Pointer<Byte> pushConstants; 1396 Pointer<Byte> constants; 1397 Int killMask = Int{ 0 }; 1398 1399 // Shader invocation state. 1400 // Not all of these variables are used for every type of shader, and some 1401 // are only used when debugging. See b/146486064 for more information. 1402 // Give careful consideration to the runtime performance loss before adding 1403 // more state here. 1404 std::array<SIMD::Int, 2> windowSpacePosition; 1405 Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex 1406 Int instanceID; 1407 SIMD::Int vertexIndex; 1408 std::array<SIMD::Float, 4> fragCoord; 1409 std::array<SIMD::Float, 4> pointCoord; 1410 SIMD::Int helperInvocation; 1411 Int4 numWorkgroups; 1412 Int4 workgroupID; 1413 Int4 workgroupSize; 1414 Int subgroupsPerWorkgroup; 1415 Int invocationsPerSubgroup; 1416 Int subgroupIndex; 1417 SIMD::Int localInvocationIndex; 1418 std::array<SIMD::Int, 3> localInvocationID; 1419 std::array<SIMD::Int, 3> globalInvocationID; 1420 1421 Pointer<Byte> dbgState; // Pointer to a debugger state. 1422 createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1423 void createVariable(SpirvShader::Object::ID id, uint32_t componentCount) 1424 { 1425 bool added = variables.emplace(id, Variable(componentCount)).second; 1426 ASSERT_MSG(added, "Variable %d created twice", id.value()); 1427 } 1428 getVariable(SpirvShader::Object::ID id)1429 Variable &getVariable(SpirvShader::Object::ID id) 1430 { 1431 auto it = variables.find(id); 1432 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); 1433 return it->second; 1434 } 1435 1436 // setImmutableInputBuiltins() sets all the immutable input builtins, 1437 // common for all shader types. 1438 void setImmutableInputBuiltins(SpirvShader const *shader); 1439 1440 static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); 1441 1442 // setInputBuiltin() calls f() with the builtin and value if the shader 1443 // uses the input builtin, otherwise the call is a no-op. 1444 // F is a function with the signature: 1445 // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) 1446 template<typename F> setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1447 inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f) 1448 { 1449 auto it = shader->inputBuiltins.find(id); 1450 if(it != shader->inputBuiltins.end()) 1451 { 1452 const auto &builtin = it->second; 1453 f(builtin, getVariable(builtin.Id)); 1454 } 1455 } 1456 1457 private: 1458 // The phis are only accessible to SpirvShader as they are only used and 1459 // exist between calls to SpirvShader::emitProlog() and 1460 // SpirvShader::emitEpilog(). 1461 friend class SpirvShader; 1462 1463 std::unordered_map<SpirvShader::Object::ID, Variable> phis; 1464 }; 1465 1466 } // namespace sw 1467 1468 #endif // sw_SpirvShader_hpp 1469