1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_SpirvShader_hpp 16 #define sw_SpirvShader_hpp 17 18 #include "SamplerCore.hpp" 19 #include "ShaderCore.hpp" 20 #include "SpirvBinary.hpp" 21 #include "SpirvID.hpp" 22 #include "Device/Config.hpp" 23 #include "Device/Sampler.hpp" 24 #include "System/Debug.hpp" 25 #include "System/Math.hpp" 26 #include "System/Types.hpp" 27 #include "Vulkan/VkConfig.hpp" 28 #include "Vulkan/VkDescriptorSet.hpp" 29 30 #define SPV_ENABLE_UTILITY_CODE 31 #include <spirv/unified1/spirv.hpp> 32 33 #include <array> 34 #include <atomic> 35 #include <cstdint> 36 #include <cstring> 37 #include <deque> 38 #include <functional> 39 #include <memory> 40 #include <string> 41 #include <type_traits> 42 #include <unordered_map> 43 #include <unordered_set> 44 #include <vector> 45 46 #undef Yield // b/127920555 47 48 namespace vk { 49 50 class Device; 51 class PipelineLayout; 52 class ImageView; 53 class Sampler; 54 class RenderPass; 55 struct SampledImageDescriptor; 56 struct SamplerState; 57 58 } // namespace vk 59 60 namespace sw { 61 62 // Forward declarations. 63 class SpirvRoutine; 64 65 // Incrementally constructed complex bundle of rvalues 66 // Effectively a restricted vector, supporting only: 67 // - allocation to a (runtime-known) fixed component count 68 // - in-place construction of elements 69 // - const operator[] 70 class Intermediate 71 { 72 public: Intermediate(uint32_t componentCount)73 Intermediate(uint32_t componentCount) 74 : componentCount(componentCount) 75 , scalar(new rr::Value *[componentCount]) 76 { 77 for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; } 78 } 79 ~Intermediate()80 ~Intermediate() 81 { 82 delete[] scalar; 83 } 84 85 // TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to 86 // decide the format used to print the intermediate data. 87 enum class TypeHint 88 { 89 Float, 90 Int, 91 UInt 92 }; 93 move(uint32_t i,RValue<SIMD::Float> && scalar)94 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,RValue<SIMD::Int> && scalar)95 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,RValue<SIMD::UInt> && scalar)96 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 97 move(uint32_t i,const RValue<SIMD::Float> & scalar)98 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,const RValue<SIMD::Int> & scalar)99 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,const RValue<SIMD::UInt> & scalar)100 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 101 102 // Value retrieval functions. Float(uint32_t i) const103 RValue<SIMD::Float> Float(uint32_t i) const 104 { 105 ASSERT(i < componentCount); 106 ASSERT(scalar[i] != nullptr); 107 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) 108 } 109 Int(uint32_t i) const110 RValue<SIMD::Int> Int(uint32_t i) const 111 { 112 ASSERT(i < componentCount); 113 ASSERT(scalar[i] != nullptr); 114 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) 115 } 116 UInt(uint32_t i) const117 RValue<SIMD::UInt> UInt(uint32_t i) const 118 { 119 ASSERT(i < componentCount); 120 ASSERT(scalar[i] != nullptr); 121 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) 122 } 123 124 // No copy/move construction or assignment 125 Intermediate(const Intermediate &) = delete; 126 Intermediate(Intermediate &&) = delete; 127 Intermediate &operator=(const Intermediate &) = delete; 128 Intermediate &operator=(Intermediate &&) = delete; 129 130 const uint32_t componentCount; 131 132 private: emplace(uint32_t i,rr::Value * value,TypeHint type)133 void emplace(uint32_t i, rr::Value *value, TypeHint type) 134 { 135 ASSERT(i < componentCount); 136 ASSERT(scalar[i] == nullptr); 137 scalar[i] = value; 138 RR_PRINT_ONLY(typeHint = type;) 139 } 140 141 rr::Value **const scalar; 142 143 #ifdef ENABLE_RR_PRINT 144 friend struct rr::PrintValue::Ty<sw::Intermediate>; 145 TypeHint typeHint = TypeHint::Float; 146 #endif // ENABLE_RR_PRINT 147 }; 148 149 // The Spirv class parses a SPIR-V binary and provides utilities for retrieving 150 // information about instructions, objects, types, etc. 151 class Spirv 152 { 153 public: 154 Spirv(VkShaderStageFlagBits stage, 155 const char *entryPointName, 156 const SpirvBinary &insns); 157 158 ~Spirv(); 159 160 SpirvBinary insns; 161 162 class Type; 163 class Object; 164 165 // Pseudo-iterator over SPIR-V instructions, designed to support range-based-for. 166 class InsnIterator 167 { 168 public: 169 InsnIterator() = default; 170 InsnIterator(const InsnIterator &other) = default; 171 InsnIterator &operator=(const InsnIterator &other) = default; 172 InsnIterator(SpirvBinary::const_iterator iter)173 explicit InsnIterator(SpirvBinary::const_iterator iter) 174 : iter{ iter } 175 { 176 } 177 opcode() const178 spv::Op opcode() const 179 { 180 return static_cast<spv::Op>(*iter & spv::OpCodeMask); 181 } 182 wordCount() const183 uint32_t wordCount() const 184 { 185 return *iter >> spv::WordCountShift; 186 } 187 word(uint32_t n) const188 uint32_t word(uint32_t n) const 189 { 190 ASSERT(n < wordCount()); 191 return iter[n]; 192 } 193 data() const194 const uint32_t *data() const 195 { 196 return &iter[0]; 197 } 198 string(uint32_t n) const199 const char *string(uint32_t n) const 200 { 201 return reinterpret_cast<const char *>(&iter[n]); 202 } 203 204 // Returns the number of whole-words that a string literal starting at 205 // word n consumes. If the end of the intruction is reached before the 206 // null-terminator is found, then the function DABORT()s and 0 is 207 // returned. stringSizeInWords(uint32_t n) const208 uint32_t stringSizeInWords(uint32_t n) const 209 { 210 uint32_t c = wordCount(); 211 for(uint32_t i = n; n < c; i++) 212 { 213 const char *s = string(i); 214 // SPIR-V spec 2.2.1. Instructions: 215 // A string is interpreted as a nul-terminated stream of 216 // characters. The character set is Unicode in the UTF-8 217 // encoding scheme. The UTF-8 octets (8-bit bytes) are packed 218 // four per word, following the little-endian convention (i.e., 219 // the first octet is in the lowest-order 8 bits of the word). 220 // The final word contains the string's nul-termination 221 // character (0), and all contents past the end of the string in 222 // the final word are padded with 0. 223 if(s[3] == 0) 224 { 225 return 1 + i - n; 226 } 227 } 228 DABORT("SPIR-V string literal was not null-terminated"); 229 return 0; 230 } 231 hasResultAndType() const232 bool hasResultAndType() const 233 { 234 bool hasResult = false, hasResultType = false; 235 spv::HasResultAndType(opcode(), &hasResult, &hasResultType); 236 237 return hasResultType; 238 } 239 resultTypeId() const240 SpirvID<Type> resultTypeId() const 241 { 242 ASSERT(hasResultAndType()); 243 return word(1); 244 } 245 resultId() const246 SpirvID<Object> resultId() const 247 { 248 ASSERT(hasResultAndType()); 249 return word(2); 250 } 251 distanceFrom(const InsnIterator & other) const252 uint32_t distanceFrom(const InsnIterator &other) const 253 { 254 return static_cast<uint32_t>(iter - other.iter); 255 } 256 operator ==(const InsnIterator & other) const257 bool operator==(const InsnIterator &other) const 258 { 259 return iter == other.iter; 260 } 261 operator !=(const InsnIterator & other) const262 bool operator!=(const InsnIterator &other) const 263 { 264 return iter != other.iter; 265 } 266 operator *() const267 InsnIterator operator*() const 268 { 269 return *this; 270 } 271 operator ++()272 InsnIterator &operator++() 273 { 274 iter += wordCount(); 275 return *this; 276 } 277 operator ++(int)278 InsnIterator const operator++(int) 279 { 280 InsnIterator ret{ *this }; 281 iter += wordCount(); 282 return ret; 283 } 284 285 private: 286 SpirvBinary::const_iterator iter; 287 }; 288 289 // Range-based-for interface begin() const290 InsnIterator begin() const 291 { 292 // Skip over the header words 293 return InsnIterator{ insns.cbegin() + 5 }; 294 } 295 end() const296 InsnIterator end() const 297 { 298 return InsnIterator{ insns.cend() }; 299 } 300 301 // A range of contiguous instruction words. 302 struct Span 303 { Spansw::Spirv::Span304 Span(const InsnIterator &insn, uint32_t offset, uint32_t size) 305 : insn(insn) 306 , offset(offset) 307 , wordCount(size) 308 {} 309 operator []sw::Spirv::Span310 uint32_t operator[](uint32_t index) const 311 { 312 ASSERT(index < wordCount); 313 return insn.word(offset + index); 314 } 315 sizesw::Spirv::Span316 uint32_t size() const 317 { 318 return wordCount; 319 } 320 321 private: 322 const InsnIterator &insn; 323 const uint32_t offset; 324 const uint32_t wordCount; 325 }; 326 327 class Type 328 { 329 public: 330 using ID = SpirvID<Type>; 331 opcode() const332 spv::Op opcode() const { return definition.opcode(); } 333 334 InsnIterator definition; 335 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); 336 uint32_t componentCount = 0; 337 bool isBuiltInBlock = false; 338 339 // Inner element type for pointers, arrays, vectors and matrices. 340 ID element; 341 }; 342 343 class Object 344 { 345 public: 346 using ID = SpirvID<Object>; 347 opcode() const348 spv::Op opcode() const { return definition.opcode(); } typeId() const349 Type::ID typeId() const { return definition.resultTypeId(); } id() const350 Object::ID id() const { return definition.resultId(); } 351 352 bool isConstantZero() const; 353 354 InsnIterator definition; 355 std::vector<uint32_t> constantValue; 356 357 enum class Kind 358 { 359 // Invalid default kind. 360 // If we get left with an object in this state, the module was 361 // broken. 362 Unknown, 363 364 // TODO: Better document this kind. 365 // A shader interface variable pointer. 366 // Pointer with uniform address across all lanes. 367 // Pointer held by SpirvRoutine::pointers 368 InterfaceVariable, 369 370 // Constant value held by Object::constantValue. 371 Constant, 372 373 // Value held by SpirvRoutine::intermediates. 374 Intermediate, 375 376 // Pointer held by SpirvRoutine::pointers 377 Pointer, 378 379 // Combination of an image pointer and a sampler ID 380 SampledImage, 381 382 // A pointer to a vk::DescriptorSet*. 383 // Pointer held by SpirvRoutine::pointers. 384 DescriptorSet, 385 }; 386 387 Kind kind = Kind::Unknown; 388 }; 389 390 // Block is an interval of SPIR-V instructions, starting with the 391 // opening OpLabel, and ending with a termination instruction. 392 class Block 393 { 394 public: 395 using ID = SpirvID<Block>; 396 using Set = std::unordered_set<ID>; 397 398 // Edge represents the graph edge between two blocks. 399 struct Edge 400 { 401 ID from; 402 ID to; 403 operator ==sw::Spirv::Block::Edge404 bool operator==(const Edge &other) const { return from == other.from && to == other.to; } 405 406 struct Hash 407 { operator ()sw::Spirv::Block::Edge::Hash408 std::size_t operator()(const Edge &edge) const noexcept 409 { 410 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); 411 } 412 }; 413 }; 414 415 Block() = default; 416 Block(const Block &other) = default; 417 Block &operator=(const Block &other) = default; 418 explicit Block(InsnIterator begin, InsnIterator end); 419 420 /* range-based-for interface */ begin() const421 inline InsnIterator begin() const { return begin_; } end() const422 inline InsnIterator end() const { return end_; } 423 424 enum Kind 425 { 426 Simple, // OpBranch or other simple terminator. 427 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional 428 UnstructuredBranchConditional, // OpBranchConditional 429 StructuredSwitch, // OpSelectionMerge + OpSwitch 430 UnstructuredSwitch, // OpSwitch 431 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] 432 }; 433 434 Kind kind = Simple; 435 InsnIterator mergeInstruction; // Structured control flow merge instruction. 436 InsnIterator branchInstruction; // Branch instruction. 437 ID mergeBlock; // Structured flow merge block. 438 ID continueTarget; // Loop continue block. 439 Set ins; // Blocks that branch into this block. 440 Set outs; // Blocks that this block branches to. 441 bool isLoopMerge = false; 442 443 private: 444 InsnIterator begin_; 445 InsnIterator end_; 446 }; 447 448 class Function 449 { 450 public: 451 using ID = SpirvID<Function>; 452 453 // Walks all reachable the blocks starting from id adding them to 454 // reachable. 455 void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; 456 457 // AssignBlockFields() performs the following for all reachable blocks: 458 // * Assigns Block::ins with the identifiers of all blocks that contain 459 // this block in their Block::outs. 460 // * Sets Block::isLoopMerge to true if the block is the merge of a 461 // another loop block. 462 void AssignBlockFields(); 463 464 // ForeachBlockDependency calls f with each dependency of the given 465 // block. A dependency is an incoming block that is not a loop-back 466 // edge. 467 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; 468 469 // ExistsPath returns true if there's a direct or indirect flow from 470 // the 'from' block to the 'to' block that does not pass through 471 // notPassingThrough. 472 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; 473 getBlock(Block::ID id) const474 const Block &getBlock(Block::ID id) const 475 { 476 auto it = blocks.find(id); 477 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value()); 478 return it->second; 479 } 480 481 Block::ID entry; // function entry point block. 482 HandleMap<Block> blocks; // blocks belonging to this function. 483 Type::ID type; // type of the function. 484 Type::ID result; // return type. 485 }; 486 487 using String = std::string; 488 using StringID = SpirvID<std::string>; 489 490 class Extension 491 { 492 public: 493 using ID = SpirvID<Extension>; 494 495 enum Name 496 { 497 Unknown, 498 GLSLstd450, 499 OpenCLDebugInfo100, 500 NonSemanticInfo, 501 }; 502 503 Name name; 504 }; 505 506 struct TypeOrObject 507 {}; 508 509 // TypeOrObjectID is an identifier that represents a Type or an Object, 510 // and supports implicit casting to and from Type::ID or Object::ID. 511 class TypeOrObjectID : public SpirvID<TypeOrObject> 512 { 513 public: 514 using Hash = std::hash<SpirvID<TypeOrObject>>; 515 TypeOrObjectID(uint32_t id)516 inline TypeOrObjectID(uint32_t id) 517 : SpirvID(id) 518 {} TypeOrObjectID(Type::ID id)519 inline TypeOrObjectID(Type::ID id) 520 : SpirvID(id.value()) 521 {} TypeOrObjectID(Object::ID id)522 inline TypeOrObjectID(Object::ID id) 523 : SpirvID(id.value()) 524 {} operator Type::ID() const525 inline operator Type::ID() const { return Type::ID(value()); } operator Object::ID() const526 inline operator Object::ID() const { return Object::ID(value()); } 527 }; 528 529 // This method is for retrieving an ID that uniquely identifies the 530 // shader entry point represented by this object. getIdentifier() const531 uint64_t getIdentifier() const 532 { 533 return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier(); 534 } 535 536 struct ExecutionModes 537 { 538 bool EarlyFragmentTests : 1; 539 bool DepthReplacing : 1; 540 bool DepthGreater : 1; 541 bool DepthLess : 1; 542 bool DepthUnchanged : 1; 543 bool StencilRefReplacing : 1; 544 545 // Compute workgroup dimensions 546 Object::ID WorkgroupSizeX = 1; 547 Object::ID WorkgroupSizeY = 1; 548 Object::ID WorkgroupSizeZ = 1; 549 bool useWorkgroupSizeId = false; 550 }; 551 getExecutionModes() const552 const ExecutionModes &getExecutionModes() const 553 { 554 return executionModes; 555 } 556 557 struct Analysis 558 { 559 bool ContainsDiscard : 1; // OpKill, OpTerminateInvocation, or OpDemoteToHelperInvocation 560 bool ContainsControlBarriers : 1; 561 bool NeedsCentroid : 1; 562 bool ContainsSampleQualifier : 1; 563 bool ContainsImageWrite : 1; 564 }; 565 getAnalysis() const566 const Analysis &getAnalysis() const { return analysis; } containsImageWrite() const567 bool containsImageWrite() const { return analysis.ContainsImageWrite; } 568 coverageModified() const569 bool coverageModified() const 570 { 571 return analysis.ContainsDiscard || 572 (outputBuiltins.find(spv::BuiltInSampleMask) != outputBuiltins.end()); 573 } 574 575 struct Capabilities 576 { 577 bool Matrix : 1; 578 bool Shader : 1; 579 bool StorageImageMultisample : 1; 580 bool ClipDistance : 1; 581 bool CullDistance : 1; 582 bool ImageCubeArray : 1; 583 bool SampleRateShading : 1; 584 bool InputAttachment : 1; 585 bool Sampled1D : 1; 586 bool Image1D : 1; 587 bool SampledBuffer : 1; 588 bool SampledCubeArray : 1; 589 bool ImageBuffer : 1; 590 bool ImageMSArray : 1; 591 bool StorageImageExtendedFormats : 1; 592 bool ImageQuery : 1; 593 bool DerivativeControl : 1; 594 bool DotProductInputAll : 1; 595 bool DotProductInput4x8Bit : 1; 596 bool DotProductInput4x8BitPacked : 1; 597 bool DotProduct : 1; 598 bool InterpolationFunction : 1; 599 bool StorageImageWriteWithoutFormat : 1; 600 bool GroupNonUniform : 1; 601 bool GroupNonUniformVote : 1; 602 bool GroupNonUniformBallot : 1; 603 bool GroupNonUniformShuffle : 1; 604 bool GroupNonUniformShuffleRelative : 1; 605 bool GroupNonUniformArithmetic : 1; 606 bool GroupNonUniformQuad : 1; 607 bool DeviceGroup : 1; 608 bool MultiView : 1; 609 bool SignedZeroInfNanPreserve : 1; 610 bool DemoteToHelperInvocation : 1; 611 bool StencilExportEXT : 1; 612 bool VulkanMemoryModel : 1; 613 bool VulkanMemoryModelDeviceScope : 1; 614 bool ShaderNonUniform : 1; 615 bool RuntimeDescriptorArray : 1; 616 bool StorageBufferArrayNonUniformIndexing : 1; 617 bool StorageTexelBufferArrayNonUniformIndexing : 1; 618 bool StorageTexelBufferArrayDynamicIndexing : 1; 619 bool UniformTexelBufferArrayNonUniformIndexing : 1; 620 bool UniformTexelBufferArrayDynamicIndexing : 1; 621 bool UniformBufferArrayNonUniformIndex : 1; 622 bool SampledImageArrayNonUniformIndexing : 1; 623 bool StorageImageArrayNonUniformIndexing : 1; 624 bool PhysicalStorageBufferAddresses : 1; 625 }; 626 getUsedCapabilities() const627 const Capabilities &getUsedCapabilities() const 628 { 629 return capabilities; 630 } 631 632 // getNumOutputClipDistances() returns the number of ClipDistances 633 // outputted by this shader. getNumOutputClipDistances() const634 unsigned int getNumOutputClipDistances() const 635 { 636 if(getUsedCapabilities().ClipDistance) 637 { 638 auto it = outputBuiltins.find(spv::BuiltInClipDistance); 639 if(it != outputBuiltins.end()) 640 { 641 return it->second.SizeInComponents; 642 } 643 } 644 return 0; 645 } 646 647 // getNumOutputCullDistances() returns the number of CullDistances 648 // outputted by this shader. getNumOutputCullDistances() const649 unsigned int getNumOutputCullDistances() const 650 { 651 if(getUsedCapabilities().CullDistance) 652 { 653 auto it = outputBuiltins.find(spv::BuiltInCullDistance); 654 if(it != outputBuiltins.end()) 655 { 656 return it->second.SizeInComponents; 657 } 658 } 659 return 0; 660 } 661 662 enum AttribType : unsigned char 663 { 664 ATTRIBTYPE_FLOAT, 665 ATTRIBTYPE_INT, 666 ATTRIBTYPE_UINT, 667 ATTRIBTYPE_UNUSED, 668 669 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT 670 }; 671 hasBuiltinInput(spv::BuiltIn b) const672 bool hasBuiltinInput(spv::BuiltIn b) const 673 { 674 return inputBuiltins.find(b) != inputBuiltins.end(); 675 } 676 hasBuiltinOutput(spv::BuiltIn b) const677 bool hasBuiltinOutput(spv::BuiltIn b) const 678 { 679 return outputBuiltins.find(b) != outputBuiltins.end(); 680 } 681 682 struct Decorations 683 { 684 int32_t Location = -1; 685 int32_t Component = 0; 686 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); 687 int32_t Offset = -1; 688 int32_t ArrayStride = -1; 689 int32_t MatrixStride = 1; 690 691 bool HasLocation : 1; 692 bool HasComponent : 1; 693 bool HasBuiltIn : 1; 694 bool HasOffset : 1; 695 bool HasArrayStride : 1; 696 bool HasMatrixStride : 1; 697 bool HasRowMajor : 1; // whether RowMajor bit is valid. 698 699 bool Flat : 1; 700 bool Centroid : 1; 701 bool NoPerspective : 1; 702 bool Block : 1; 703 bool BufferBlock : 1; 704 bool RelaxedPrecision : 1; 705 bool RowMajor : 1; // RowMajor if true; ColMajor if false 706 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. 707 bool NonUniform : 1; 708 Decorationssw::Spirv::Decorations709 Decorations() 710 : Location{ -1 } 711 , Component{ 0 } 712 , BuiltIn{ static_cast<spv::BuiltIn>(-1) } 713 , Offset{ -1 } 714 , ArrayStride{ -1 } 715 , MatrixStride{ -1 } 716 , HasLocation{ false } 717 , HasComponent{ false } 718 , HasBuiltIn{ false } 719 , HasOffset{ false } 720 , HasArrayStride{ false } 721 , HasMatrixStride{ false } 722 , HasRowMajor{ false } 723 , Flat{ false } 724 , Centroid{ false } 725 , NoPerspective{ false } 726 , Block{ false } 727 , BufferBlock{ false } 728 , RelaxedPrecision{ false } 729 , RowMajor{ false } 730 , InsideMatrix{ false } 731 , NonUniform{ false } 732 { 733 } 734 735 Decorations(const Decorations &) = default; 736 737 void Apply(const Decorations &src); 738 739 void Apply(spv::Decoration decoration, uint32_t arg); 740 }; 741 742 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; 743 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; 744 745 struct DescriptorDecorations 746 { 747 int32_t DescriptorSet = -1; 748 int32_t Binding = -1; 749 int32_t InputAttachmentIndex = -1; 750 751 void Apply(const DescriptorDecorations &src); 752 }; 753 754 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; 755 756 struct InterfaceComponent 757 { 758 AttribType Type; 759 760 union 761 { 762 struct 763 { 764 bool Flat : 1; 765 bool Centroid : 1; 766 bool NoPerspective : 1; 767 }; 768 769 uint8_t DecorationBits; 770 }; 771 InterfaceComponentsw::Spirv::InterfaceComponent772 InterfaceComponent() 773 : Type{ ATTRIBTYPE_UNUSED } 774 , DecorationBits{ 0 } 775 { 776 } 777 }; 778 779 struct BuiltinMapping 780 { 781 Object::ID Id; 782 uint32_t FirstComponent; 783 uint32_t SizeInComponents; 784 }; 785 786 struct WorkgroupMemory 787 { 788 // allocates a new variable of size bytes with the given identifier. allocatesw::Spirv::WorkgroupMemory789 inline void allocate(Object::ID id, uint32_t size) 790 { 791 uint32_t offset = totalSize; 792 auto it = offsets.emplace(id, offset); 793 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value())); 794 totalSize += size; 795 } 796 // returns the byte offset of the variable with the given identifier. offsetOfsw::Spirv::WorkgroupMemory797 inline uint32_t offsetOf(Object::ID id) const 798 { 799 auto it = offsets.find(id); 800 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value())); 801 return it->second; 802 } 803 // returns the total allocated size in bytes. sizesw::Spirv::WorkgroupMemory804 inline uint32_t size() const { return totalSize; } 805 806 private: 807 uint32_t totalSize = 0; // in bytes 808 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes 809 }; 810 811 std::vector<InterfaceComponent> inputs; 812 std::vector<InterfaceComponent> outputs; 813 814 uint32_t getWorkgroupSizeX() const; 815 uint32_t getWorkgroupSizeY() const; 816 uint32_t getWorkgroupSizeZ() const; 817 818 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; 819 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; 820 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; 821 WorkgroupMemory workgroupMemory; 822 823 Function::ID entryPoint; 824 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. 825 ExecutionModes executionModes = {}; 826 Capabilities capabilities = {}; 827 spv::AddressingModel addressingModel = spv::AddressingModelLogical; 828 spv::MemoryModel memoryModel = spv::MemoryModelSimple; 829 HandleMap<Extension> extensionsByID; 830 std::unordered_set<uint32_t> extensionsImported; 831 832 Analysis analysis = {}; 833 834 HandleMap<Type> types; 835 HandleMap<Object> defs; 836 837 // TODO(b/247020580): Encapsulate 838 public: 839 HandleMap<Function> functions; 840 std::unordered_map<StringID, String> strings; 841 842 // DeclareType creates a Type for the given OpTypeX instruction, storing 843 // it into the types map. It is called from the analysis pass (constructor). 844 void DeclareType(InsnIterator insn); 845 846 void ProcessExecutionMode(InsnIterator it); 847 848 uint32_t ComputeTypeSize(InsnIterator insn); 849 Decorations GetDecorationsForId(TypeOrObjectID id) const; 850 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; 851 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; 852 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, const Span &indexIds) const; 853 854 // Creates an Object for the instruction's result in 'defs'. 855 void DefineResult(const InsnIterator &insn); 856 857 using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; 858 859 void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; 860 861 int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; 862 863 // MemoryElement describes a scalar element within a structure, and is 864 // used by the callback function of VisitMemoryObject(). 865 struct MemoryElement 866 { 867 uint32_t index; // index of the scalar element 868 uint32_t offset; // offset (in bytes) from the base of the object 869 const Type &type; // element type 870 }; 871 872 using MemoryVisitor = std::function<void(const MemoryElement &)>; 873 874 // VisitMemoryObject() walks a type tree in an explicitly laid out 875 // storage class, calling the MemoryVisitor for each scalar element 876 // within the 877 void VisitMemoryObject(Object::ID id, bool resultIsPointer, const MemoryVisitor &v) const; 878 879 // VisitMemoryObjectInner() is internally called by VisitMemoryObject() 880 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, bool resultIsPointer, const MemoryVisitor &v) const; 881 882 Object &CreateConstant(InsnIterator it); 883 884 void ProcessInterfaceVariable(Object &object); 885 getType(Type::ID id) const886 const Type &getType(Type::ID id) const 887 { 888 auto it = types.find(id); 889 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value()); 890 return it->second; 891 } 892 getType(const Object & object) const893 const Type &getType(const Object &object) const 894 { 895 return getType(object.typeId()); 896 } 897 getObject(Object::ID id) const898 const Object &getObject(Object::ID id) const 899 { 900 auto it = defs.find(id); 901 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value()); 902 return it->second; 903 } 904 getObjectType(Object::ID id) const905 const Type &getObjectType(Object::ID id) const 906 { 907 return getType(getObject(id)); 908 } 909 getFunction(Function::ID id) const910 const Function &getFunction(Function::ID id) const 911 { 912 auto it = functions.find(id); 913 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value()); 914 return it->second; 915 } 916 getString(StringID id) const917 const String &getString(StringID id) const 918 { 919 auto it = strings.find(id); 920 ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value()); 921 return it->second; 922 } 923 getExtension(Extension::ID id) const924 const Extension &getExtension(Extension::ID id) const 925 { 926 auto it = extensionsByID.find(id); 927 ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value()); 928 return it->second; 929 } 930 931 // Returns the *component* offset in the literal for the given access chain. 932 uint32_t WalkLiteralAccessChain(Type::ID id, const Span &indexes) const; 933 934 uint32_t GetConstScalarInt(Object::ID id) const; 935 void EvalSpecConstantOp(InsnIterator insn); 936 void EvalSpecConstantUnaryOp(InsnIterator insn); 937 void EvalSpecConstantBinaryOp(InsnIterator insn); 938 939 // Fragment input interpolation functions 940 uint32_t GetNumInputComponents(int32_t location) const; 941 uint32_t GetPackedInterpolant(int32_t location) const; 942 943 // WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's 944 // control flow to the given file path. 945 void WriteCFGGraphVizDotFile(const char *path) const; 946 947 // OpcodeName() returns the name of the opcode op. 948 static const char *OpcodeName(spv::Op opcode); 949 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); 950 951 // IsStatement() returns true if the given opcode actually performs 952 // work (as opposed to declaring a type, defining a function start / end, 953 // etc). 954 static bool IsStatement(spv::Op opcode); 955 956 // HasTypeAndResult() returns true if the given opcode's instruction 957 // has a result type ID and result ID, i.e. defines an Object. 958 static bool HasTypeAndResult(spv::Op opcode); 959 960 // Returns 0 when invalid. 961 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); 962 963 static bool StoresInHelperInvocationsHaveNoEffect(spv::StorageClass storageClass); 964 static bool IsExplicitLayout(spv::StorageClass storageClass); 965 static bool IsTerminator(spv::Op opcode); 966 }; 967 968 // The SpirvShader class holds a parsed SPIR-V shader but also the pipeline 969 // state which affects code emission when passing it to SpirvEmitter. 970 class SpirvShader : public Spirv 971 { 972 public: 973 SpirvShader(VkShaderStageFlagBits stage, 974 const char *entryPointName, 975 const SpirvBinary &insns, 976 const vk::RenderPass *renderPass, 977 uint32_t subpassIndex, 978 bool robustBufferAccess); 979 980 ~SpirvShader(); 981 982 // TODO(b/247020580): Move to SpirvRoutine 983 void emitProlog(SpirvRoutine *routine) const; 984 void emit(SpirvRoutine *routine, const RValue<SIMD::Int> &activeLaneMask, const RValue<SIMD::Int> &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const; 985 void emitEpilog(SpirvRoutine *routine) const; 986 getRobustBufferAccess() const987 bool getRobustBufferAccess() const { return robustBufferAccess; } 988 OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, const vk::PipelineLayout *pipelineLayout) const; 989 getInputAttachmentFormat(int32_t index) const990 vk::Format getInputAttachmentFormat(int32_t index) const { return inputAttachmentFormats[index]; } 991 992 private: 993 const bool robustBufferAccess; 994 995 std::vector<vk::Format> inputAttachmentFormats; 996 }; 997 998 // The SpirvEmitter class translates the parsed SPIR-V shader into Reactor code. 999 class SpirvEmitter 1000 { 1001 using Type = Spirv::Type; 1002 using Object = Spirv::Object; 1003 using Block = Spirv::Block; 1004 using InsnIterator = Spirv::InsnIterator; 1005 using Decorations = Spirv::Decorations; 1006 using Span = Spirv::Span; 1007 1008 public: 1009 static void emit(const SpirvShader &shader, 1010 SpirvRoutine *routine, 1011 Spirv::Function::ID entryPoint, 1012 RValue<SIMD::Int> activeLaneMask, 1013 RValue<SIMD::Int> storesAndAtomicsMask, 1014 const vk::DescriptorSet::Bindings &descriptorSets, 1015 unsigned int multiSampleCount); 1016 1017 // Helper for calling rr::Yield with result cast to an rr::Int. 1018 enum class YieldResult 1019 { 1020 ControlBarrier = 0, 1021 }; 1022 1023 private: 1024 SpirvEmitter(const SpirvShader &shader, 1025 SpirvRoutine *routine, 1026 Spirv::Function::ID entryPoint, 1027 RValue<SIMD::Int> activeLaneMask, 1028 RValue<SIMD::Int> storesAndAtomicsMask, 1029 const vk::DescriptorSet::Bindings &descriptorSets, 1030 unsigned int multiSampleCount); 1031 1032 // Returns the mask describing the active lanes as updated by dynamic 1033 // control flow. Active lanes include helper invocations, used for 1034 // calculating fragment derivitives, which must not perform memory 1035 // stores or atomic writes. 1036 // 1037 // Use activeStoresAndAtomicsMask() to consider both control flow and 1038 // lanes which are permitted to perform memory stores and atomic 1039 // operations activeLaneMask() const1040 RValue<SIMD::Int> activeLaneMask() const 1041 { 1042 ASSERT(activeLaneMaskValue != nullptr); 1043 return RValue<SIMD::Int>(activeLaneMaskValue); 1044 } 1045 1046 // Returns the immutable lane mask that describes which lanes are 1047 // permitted to perform memory stores and atomic operations. 1048 // Note that unlike activeStoresAndAtomicsMask() this mask *does not* 1049 // consider lanes that have been made inactive due to control flow. storesAndAtomicsMask() const1050 RValue<SIMD::Int> storesAndAtomicsMask() const 1051 { 1052 ASSERT(storesAndAtomicsMaskValue != nullptr); 1053 return RValue<SIMD::Int>(storesAndAtomicsMaskValue); 1054 } 1055 1056 // Returns a lane mask that describes which lanes are permitted to 1057 // perform memory stores and atomic operations, considering lanes that 1058 // may have been made inactive due to control flow. activeStoresAndAtomicsMask() const1059 RValue<SIMD::Int> activeStoresAndAtomicsMask() const 1060 { 1061 return activeLaneMask() & storesAndAtomicsMask(); 1062 } 1063 1064 // Add a new active lane mask edge from the current block to out. 1065 // The edge mask value will be (mask AND activeLaneMaskValue). 1066 // If multiple active lane masks are added for the same edge, then 1067 // they will be ORed together. 1068 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); 1069 1070 // Add a new active lane mask for the edge from -> to. 1071 // If multiple active lane masks are added for the same edge, then 1072 // they will be ORed together. 1073 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); 1074 1075 // OpImageSample variants 1076 enum Variant : uint32_t 1077 { 1078 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. 1079 Dref, 1080 Proj, 1081 ProjDref, 1082 VARIANT_LAST = ProjDref 1083 }; 1084 1085 // Compact representation of image instruction state that is passed to the 1086 // trampoline function for retrieving/generating the corresponding sampling routine. 1087 struct ImageInstructionSignature 1088 { ImageInstructionSignaturesw::SpirvEmitter::ImageInstructionSignature1089 ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod) 1090 { 1091 this->variant = variant; 1092 this->samplerMethod = samplerMethod; 1093 } 1094 1095 // Unmarshal from raw 32-bit data ImageInstructionSignaturesw::SpirvEmitter::ImageInstructionSignature1096 explicit ImageInstructionSignature(uint32_t signature) 1097 : signature(signature) 1098 {} 1099 getSamplerFunctionsw::SpirvEmitter::ImageInstructionSignature1100 SamplerFunction getSamplerFunction() const 1101 { 1102 return { samplerMethod, offset != 0, sample != 0 }; 1103 } 1104 isDrefsw::SpirvEmitter::ImageInstructionSignature1105 bool isDref() const 1106 { 1107 return (variant == Dref) || (variant == ProjDref); 1108 } 1109 isProjsw::SpirvEmitter::ImageInstructionSignature1110 bool isProj() const 1111 { 1112 return (variant == Proj) || (variant == ProjDref); 1113 } 1114 hasLodsw::SpirvEmitter::ImageInstructionSignature1115 bool hasLod() const 1116 { 1117 return samplerMethod == Lod || samplerMethod == Fetch; // We always pass a Lod operand for Fetch operations. 1118 } 1119 hasGradsw::SpirvEmitter::ImageInstructionSignature1120 bool hasGrad() const 1121 { 1122 return samplerMethod == Grad; 1123 } 1124 1125 union 1126 { 1127 struct 1128 { 1129 Variant variant : BITS(VARIANT_LAST); 1130 SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST); 1131 uint32_t gatherComponent : 2; 1132 uint32_t dim : BITS(spv::DimSubpassData); // spv::Dim 1133 uint32_t arrayed : 1; 1134 uint32_t imageFormat : BITS(spv::ImageFormatR64i); // spv::ImageFormat 1135 1136 // Parameters are passed to the sampling routine in this order: 1137 uint32_t coordinates : 3; // 1-4 (does not contain projection component) 1138 /* uint32_t dref : 1; */ // Indicated by Variant::ProjDref|Dref 1139 /* uint32_t lodOrBias : 1; */ // Indicated by SamplerMethod::Lod|Bias|Fetch 1140 uint32_t grad : 2; // 0-3 components (for each of dx / dy) 1141 uint32_t offset : 2; // 0-3 components 1142 uint32_t sample : 1; // 0-1 scalar integer 1143 }; 1144 1145 uint32_t signature = 0; 1146 }; 1147 }; 1148 1149 // This gets stored as a literal in the generated code, so it should be compact. 1150 static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit"); 1151 1152 struct ImageInstruction : public ImageInstructionSignature 1153 { 1154 ImageInstruction(InsnIterator insn, const Spirv &shader, const SpirvEmitter &state); 1155 1156 const uint32_t position; 1157 1158 Type::ID resultTypeId = 0; 1159 Object::ID resultId = 0; 1160 Object::ID imageId = 0; 1161 Object::ID samplerId = 0; 1162 Object::ID coordinateId = 0; 1163 Object::ID texelId = 0; 1164 Object::ID drefId = 0; 1165 Object::ID lodOrBiasId = 0; 1166 Object::ID gradDxId = 0; 1167 Object::ID gradDyId = 0; 1168 Object::ID offsetId = 0; 1169 Object::ID sampleId = 0; 1170 1171 private: 1172 static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn); 1173 static uint32_t getImageOperandsIndex(InsnIterator insn); 1174 static uint32_t getImageOperandsMask(InsnIterator insn); 1175 }; 1176 1177 class SampledImagePointer : public SIMD::Pointer 1178 { 1179 public: SampledImagePointer(SIMD::Pointer image,Object::ID sampler)1180 SampledImagePointer(SIMD::Pointer image, Object::ID sampler) 1181 : SIMD::Pointer(image) 1182 , samplerId(sampler) 1183 {} 1184 Object::ID samplerId; 1185 }; 1186 1187 // Generic wrapper over either per-lane intermediate value, or a constant. 1188 // Constants are transparently widened to per-lane values in operator[]. 1189 // This is appropriate in most cases -- if we're not going to do something 1190 // significantly different based on whether the value is uniform across lanes. 1191 class Operand 1192 { 1193 public: 1194 Operand(const Spirv &shader, const SpirvEmitter &state, Object::ID objectId); 1195 Operand(const Intermediate &value); 1196 Float(uint32_t i) const1197 RValue<SIMD::Float> Float(uint32_t i) const 1198 { 1199 if(intermediate) 1200 { 1201 return intermediate->Float(i); 1202 } 1203 1204 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact 1205 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". 1206 // Thus we must first construct an integer constant, and bitcast to float. 1207 return As<SIMD::Float>(SIMD::UInt(constant[i])); 1208 } 1209 Int(uint32_t i) const1210 RValue<SIMD::Int> Int(uint32_t i) const 1211 { 1212 if(intermediate) 1213 { 1214 return intermediate->Int(i); 1215 } 1216 1217 return SIMD::Int(constant[i]); 1218 } 1219 UInt(uint32_t i) const1220 RValue<SIMD::UInt> UInt(uint32_t i) const 1221 { 1222 if(intermediate) 1223 { 1224 return intermediate->UInt(i); 1225 } 1226 1227 return SIMD::UInt(constant[i]); 1228 } 1229 Pointer() const1230 const SIMD::Pointer &Pointer() const 1231 { 1232 ASSERT(intermediate == nullptr); 1233 1234 return *pointer; 1235 } 1236 isPointer() const1237 bool isPointer() const 1238 { 1239 return (pointer != nullptr); 1240 } 1241 SampledImage() const1242 const SampledImagePointer &SampledImage() const 1243 { 1244 ASSERT(intermediate == nullptr); 1245 1246 return *sampledImage; 1247 } 1248 isSampledImage() const1249 bool isSampledImage() const 1250 { 1251 return (sampledImage != nullptr); 1252 } 1253 1254 private: 1255 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1256 1257 // Delegate constructor 1258 Operand(const SpirvEmitter &state, const Object &object); 1259 1260 const uint32_t *constant = nullptr; 1261 const Intermediate *intermediate = nullptr; 1262 const SIMD::Pointer *pointer = nullptr; 1263 const SampledImagePointer *sampledImage = nullptr; 1264 1265 public: 1266 const uint32_t componentCount; 1267 }; 1268 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1269 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1270 1271 Intermediate &createIntermediate(Object::ID id, uint32_t componentCount) 1272 { 1273 auto it = intermediates.emplace(std::piecewise_construct, 1274 std::forward_as_tuple(id), 1275 std::forward_as_tuple(componentCount)); 1276 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); 1277 return it.first->second; 1278 } 1279 getIntermediate(Object::ID id) const1280 const Intermediate &getIntermediate(Object::ID id) const 1281 { 1282 auto it = intermediates.find(id); 1283 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); 1284 return it->second; 1285 } 1286 createPointer(Object::ID id,SIMD::Pointer ptr)1287 void createPointer(Object::ID id, SIMD::Pointer ptr) 1288 { 1289 bool added = pointers.emplace(id, ptr).second; 1290 ASSERT_MSG(added, "Pointer %d created twice", id.value()); 1291 } 1292 getPointer(Object::ID id) const1293 const SIMD::Pointer &getPointer(Object::ID id) const 1294 { 1295 auto it = pointers.find(id); 1296 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); 1297 return it->second; 1298 } 1299 createSampledImage(Object::ID id,SampledImagePointer ptr)1300 void createSampledImage(Object::ID id, SampledImagePointer ptr) 1301 { 1302 bool added = sampledImages.emplace(id, ptr).second; 1303 ASSERT_MSG(added, "Sampled image %d created twice", id.value()); 1304 } 1305 getSampledImage(Object::ID id) const1306 const SampledImagePointer &getSampledImage(Object::ID id) const 1307 { 1308 auto it = sampledImages.find(id); 1309 ASSERT_MSG(it != sampledImages.end(), "Unknown sampled image %d", id.value()); 1310 return it->second; 1311 } 1312 isSampledImage(Object::ID id) const1313 bool isSampledImage(Object::ID id) const 1314 { 1315 return sampledImages.find(id) != sampledImages.end(); 1316 } 1317 getImage(Object::ID id) const1318 const SIMD::Pointer &getImage(Object::ID id) const 1319 { 1320 return isSampledImage(id) ? getSampledImage(id) : getPointer(id); 1321 } 1322 1323 void EmitVariable(InsnIterator insn); 1324 void EmitLoad(InsnIterator insn); 1325 void EmitStore(InsnIterator insn); 1326 void EmitAccessChain(InsnIterator insn); 1327 void EmitCompositeConstruct(InsnIterator insn); 1328 void EmitCompositeInsert(InsnIterator insn); 1329 void EmitCompositeExtract(InsnIterator insn); 1330 void EmitVectorShuffle(InsnIterator insn); 1331 void EmitVectorTimesScalar(InsnIterator insn); 1332 void EmitMatrixTimesVector(InsnIterator insn); 1333 void EmitVectorTimesMatrix(InsnIterator insn); 1334 void EmitMatrixTimesMatrix(InsnIterator insn); 1335 void EmitOuterProduct(InsnIterator insn); 1336 void EmitTranspose(InsnIterator insn); 1337 void EmitVectorExtractDynamic(InsnIterator insn); 1338 void EmitVectorInsertDynamic(InsnIterator insn); 1339 void EmitUnaryOp(InsnIterator insn); 1340 void EmitBinaryOp(InsnIterator insn); 1341 void EmitDot(InsnIterator insn); 1342 void EmitSelect(InsnIterator insn); 1343 void EmitExtendedInstruction(InsnIterator insn); 1344 void EmitExtGLSLstd450(InsnIterator insn); 1345 void EmitAny(InsnIterator insn); 1346 void EmitAll(InsnIterator insn); 1347 void EmitBranch(InsnIterator insn); 1348 void EmitBranchConditional(InsnIterator insn); 1349 void EmitSwitch(InsnIterator insn); 1350 void EmitUnreachable(InsnIterator insn); 1351 void EmitReturn(InsnIterator insn); 1352 void EmitTerminateInvocation(InsnIterator insn); 1353 void EmitDemoteToHelperInvocation(InsnIterator insn); 1354 void EmitIsHelperInvocation(InsnIterator insn); 1355 void EmitFunctionCall(InsnIterator insn); 1356 void EmitPhi(InsnIterator insn); 1357 void EmitImageSample(const ImageInstruction &instruction); 1358 void EmitImageQuerySizeLod(InsnIterator insn); 1359 void EmitImageQuerySize(InsnIterator insn); 1360 void EmitImageQueryLevels(InsnIterator insn); 1361 void EmitImageQuerySamples(InsnIterator insn); 1362 void EmitImageRead(const ImageInstruction &instruction); 1363 void EmitImageWrite(const ImageInstruction &instruction); 1364 void EmitImageTexelPointer(const ImageInstruction &instruction); 1365 void EmitAtomicOp(InsnIterator insn); 1366 void EmitAtomicCompareExchange(InsnIterator insn); 1367 void EmitSampledImage(InsnIterator insn); 1368 void EmitImage(InsnIterator insn); 1369 void EmitCopyObject(InsnIterator insn); 1370 void EmitCopyMemory(InsnIterator insn); 1371 void EmitControlBarrier(InsnIterator insn); 1372 void EmitMemoryBarrier(InsnIterator insn); 1373 void EmitGroupNonUniform(InsnIterator insn); 1374 void EmitArrayLength(InsnIterator insn); 1375 void EmitBitcastPointer(Object::ID resultID, Operand &src); 1376 1377 enum InterpolationType 1378 { 1379 Centroid, 1380 AtSample, 1381 AtOffset, 1382 }; 1383 SIMD::Float EmitInterpolate(const SIMD::Pointer &ptr, int32_t location, Object::ID paramId, 1384 uint32_t component, InterpolationType type) const; 1385 1386 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, bool nonUniform) const; 1387 SIMD::Pointer WalkAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, bool nonUniform) const; 1388 1389 // Returns true if data in the given storage class is word-interleaved 1390 // by each SIMD vector lane, otherwise data is stored linerally. 1391 // 1392 // Each lane addresses a single word, picked by a base pointer and an 1393 // integer offset. 1394 // 1395 // A word is currently 32 bits (single float, int32_t, uint32_t). 1396 // A lane is a single element of a SIMD vector register. 1397 // 1398 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): 1399 // --------------------------------------------------------------------- 1400 // 1401 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) 1402 // 1403 // Assuming SIMD::Width == 4: 1404 // 1405 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 1406 // ===========+===========+===========+========== 1407 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] 1408 // ---------------+-----------+-----------+-----------+---------- 1409 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] 1410 // ---------------+-----------+-----------+-----------+---------- 1411 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] 1412 // ---------------+-----------+-----------+-----------+---------- 1413 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] 1414 // 1415 // 1416 // Linear storage - (IsStorageInterleavedByLane() == false): 1417 // --------------------------------------------------------- 1418 // 1419 // Address = PtrBase + sizeof(Word) * LaneOffset 1420 // 1421 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 1422 // ===========+===========+===========+========== 1423 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] 1424 // ---------------+-----------+-----------+-----------+---------- 1425 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] 1426 // ---------------+-----------+-----------+-----------+---------- 1427 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] 1428 // ---------------+-----------+-----------+-----------+---------- 1429 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] 1430 // 1431 1432 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); 1433 static SIMD::Pointer GetElementPointer(SIMD::Pointer structure, uint32_t offset, spv::StorageClass storageClass); 1434 1435 // Returns a SIMD::Pointer to the underlying data for the given pointer 1436 // object. 1437 // Handles objects of the following kinds: 1438 // - DescriptorSet 1439 // - Pointer 1440 // - InterfaceVariable 1441 // Calling GetPointerToData with objects of any other kind will assert. 1442 SIMD::Pointer GetPointerToData(Object::ID id, SIMD::Int arrayIndex, bool nonUniform) const; 1443 void OffsetToElement(SIMD::Pointer &ptr, Object::ID elementId, int32_t arrayStride) const; 1444 1445 /* image istructions */ 1446 1447 // Emits code to sample an image, regardless of whether any SIMD lanes are active. 1448 void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction) const; 1449 1450 Pointer<Byte> getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const; 1451 Pointer<Byte> getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, int laneIdx) const; 1452 Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, Pointer<Byte> samplerDescriptor, const ImageInstruction &instruction) const; 1453 void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const; 1454 1455 void GetImageDimensions(const Type &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; 1456 struct TexelAddressData 1457 { 1458 bool isArrayed; 1459 spv::Dim dim; 1460 int dims, texelSize; 1461 SIMD::Int u, v, w, ptrOffset; 1462 }; 1463 static TexelAddressData setupTexelAddressData(SIMD::Int rowPitch, SIMD::Int slicePitch, SIMD::Int samplePitch, ImageInstructionSignature instruction, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, const SpirvRoutine *routine); 1464 static SIMD::Pointer GetNonUniformTexelAddress(ImageInstructionSignature instruction, SIMD::Pointer descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, SIMD::Int activeLaneMask, const SpirvRoutine *routine); 1465 static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const SpirvRoutine *routine); 1466 static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat); 1467 1468 /* control flow */ 1469 1470 // Lookup the active lane mask for the edge from -> to. 1471 // If from is unreachable, then a mask of all zeros is returned. 1472 // Asserts if from is reachable and the edge does not exist. 1473 RValue<SIMD::Int> GetActiveLaneMaskEdge(Block::ID from, Block::ID to) const; 1474 1475 // Updates the current active lane mask. 1476 void SetActiveLaneMask(RValue<SIMD::Int> mask); 1477 void SetStoresAndAtomicsMask(RValue<SIMD::Int> mask); 1478 1479 // Emit all the unvisited blocks (except for ignore) in DFS order, 1480 // starting with id. 1481 void EmitBlocks(Block::ID id, Block::ID ignore = 0); 1482 void EmitNonLoop(); 1483 void EmitLoop(); 1484 1485 void EmitInstructions(InsnIterator begin, InsnIterator end); 1486 void EmitInstruction(InsnIterator insn); 1487 1488 // Helper for implementing OpStore, which doesn't take an InsnIterator so it 1489 // can also store independent operands. 1490 void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder) const; 1491 1492 // LoadPhi loads the phi values from the alloca storage and places the 1493 // load values into the intermediate with the phi's result id. 1494 void LoadPhi(InsnIterator insn); 1495 1496 // StorePhi updates the phi's alloca storage value using the incoming 1497 // values from blocks that are both in the OpPhi instruction and in 1498 // filter. 1499 void StorePhi(Block::ID blockID, InsnIterator insn, const std::unordered_set<Block::ID> &filter); 1500 1501 // Emits a rr::Fence for the given MemorySemanticsMask. 1502 void Fence(spv::MemorySemanticsMask semantics) const; 1503 1504 void Yield(YieldResult res) const; 1505 1506 // Helper as we often need to take dot products as part of doing other things. 1507 static SIMD::Float FDot(unsigned numComponents, const Operand &x, const Operand &y); 1508 static SIMD::Int SDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1509 static SIMD::UInt UDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1510 static SIMD::Int SUDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1511 static SIMD::Int AddSat(RValue<SIMD::Int> a, RValue<SIMD::Int> b); 1512 static SIMD::UInt AddSat(RValue<SIMD::UInt> a, RValue<SIMD::UInt> b); 1513 1514 using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants); 1515 static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId); 1516 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1517 static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1518 1519 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. 1520 static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod); 1521 static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState); 1522 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType); 1523 1524 const SpirvShader &shader; 1525 SpirvRoutine *const routine; // The current routine being built. 1526 Spirv::Function::ID function; // The current function being built. 1527 Block::ID block; // The current block being built. 1528 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. 1529 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. 1530 Spirv::Block::Set visited; // Blocks already built. 1531 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; 1532 std::deque<Block::ID> *pending; 1533 1534 const vk::DescriptorSet::Bindings &descriptorSets; 1535 1536 std::unordered_map<Object::ID, Intermediate> intermediates; 1537 std::unordered_map<Object::ID, std::vector<SIMD::Float>> phis; 1538 std::unordered_map<Object::ID, SIMD::Pointer> pointers; 1539 std::unordered_map<Object::ID, SampledImagePointer> sampledImages; 1540 1541 const unsigned int multiSampleCount; 1542 }; 1543 1544 class SpirvRoutine 1545 { 1546 using Object = Spirv::Object; 1547 1548 public: 1549 SpirvRoutine(const vk::PipelineLayout *pipelineLayout); 1550 1551 using Variable = Array<SIMD::Float>; 1552 1553 // Single-entry 'inline' sampler routine cache. 1554 struct SamplerCache 1555 { 1556 Pointer<Byte> imageDescriptor = nullptr; 1557 Int samplerId; 1558 1559 Pointer<Byte> function; 1560 }; 1561 1562 enum Interpolation 1563 { 1564 Perspective = 0, 1565 Linear, 1566 Flat, 1567 }; 1568 1569 struct InterpolationData 1570 { 1571 Pointer<Byte> primitive; 1572 SIMD::Float x; 1573 SIMD::Float y; 1574 SIMD::Float rhw; 1575 SIMD::Float xCentroid; 1576 SIMD::Float yCentroid; 1577 SIMD::Float rhwCentroid; 1578 }; 1579 1580 const vk::PipelineLayout *const pipelineLayout; 1581 1582 std::unordered_map<Object::ID, Variable> variables; 1583 std::unordered_map<uint32_t, SamplerCache> samplerCache; // Indexed by the instruction position, in words. 1584 SIMD::Float inputs[MAX_INTERFACE_COMPONENTS]; 1585 Interpolation inputsInterpolation[MAX_INTERFACE_COMPONENTS]; 1586 SIMD::Float outputs[MAX_INTERFACE_COMPONENTS]; 1587 InterpolationData interpolationData; 1588 1589 Pointer<Byte> device; 1590 Pointer<Byte> workgroupMemory; 1591 Pointer<Pointer<Byte>> descriptorSets; 1592 Pointer<Int> descriptorDynamicOffsets; 1593 Pointer<Byte> pushConstants; 1594 Pointer<Byte> constants; 1595 Int discardMask = 0; 1596 1597 // Shader invocation state. 1598 // Not all of these variables are used for every type of shader, and some 1599 // are only used when debugging. See b/146486064 for more information. 1600 // Give careful consideration to the runtime performance loss before adding 1601 // more state here. 1602 std::array<SIMD::Int, 2> windowSpacePosition; // TODO(b/236162233): SIMD::Int2 1603 Int layer; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex 1604 Int instanceID; 1605 SIMD::Int vertexIndex; 1606 std::array<SIMD::Float, 4> fragCoord; // TODO(b/236162233): SIMD::Float4 1607 std::array<SIMD::Float, 2> pointCoord; // TODO(b/236162233): SIMD::Float2 1608 SIMD::Int helperInvocation; 1609 Int4 numWorkgroups; 1610 Int4 workgroupID; 1611 Int4 workgroupSize; 1612 Int subgroupsPerWorkgroup; 1613 Int invocationsPerSubgroup; 1614 Int subgroupIndex; 1615 SIMD::Int localInvocationIndex; 1616 std::array<SIMD::Int, 3> localInvocationID; // TODO(b/236162233): SIMD::Int3 1617 std::array<SIMD::Int, 3> globalInvocationID; // TODO(b/236162233): SIMD::Int3 1618 createVariable(Object::ID id,uint32_t componentCount)1619 void createVariable(Object::ID id, uint32_t componentCount) 1620 { 1621 bool added = variables.emplace(id, Variable(componentCount)).second; 1622 ASSERT_MSG(added, "Variable %d created twice", id.value()); 1623 } 1624 getVariable(Object::ID id)1625 Variable &getVariable(Object::ID id) 1626 { 1627 auto it = variables.find(id); 1628 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); 1629 return it->second; 1630 } 1631 1632 // setImmutableInputBuiltins() sets all the immutable input builtins, 1633 // common for all shader types. 1634 void setImmutableInputBuiltins(const SpirvShader *shader); 1635 1636 static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, Interpolation interpolation); 1637 1638 // setInputBuiltin() calls f() with the builtin and value if the shader 1639 // uses the input builtin, otherwise the call is a no-op. 1640 // F is a function with the signature: 1641 // void(const Spirv::BuiltinMapping& builtin, Array<SIMD::Float>& value) 1642 template<typename F> setInputBuiltin(const SpirvShader * shader,spv::BuiltIn id,F && f)1643 inline void setInputBuiltin(const SpirvShader *shader, spv::BuiltIn id, F &&f) 1644 { 1645 auto it = shader->inputBuiltins.find(id); 1646 if(it != shader->inputBuiltins.end()) 1647 { 1648 const auto &builtin = it->second; 1649 f(builtin, getVariable(builtin.Id)); 1650 } 1651 } 1652 }; 1653 1654 } // namespace sw 1655 1656 #endif // sw_SpirvShader_hpp 1657