1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_SpirvShader_hpp 16 #define sw_SpirvShader_hpp 17 18 #include "SamplerCore.hpp" 19 #include "ShaderCore.hpp" 20 #include "SpirvBinary.hpp" 21 #include "SpirvID.hpp" 22 #include "Device/Config.hpp" 23 #include "Device/Sampler.hpp" 24 #include "System/Debug.hpp" 25 #include "System/Math.hpp" 26 #include "System/Types.hpp" 27 #include "Vulkan/VkConfig.hpp" 28 #include "Vulkan/VkDescriptorSet.hpp" 29 30 #define SPV_ENABLE_UTILITY_CODE 31 #include <spirv/unified1/spirv.hpp> 32 33 #include <array> 34 #include <atomic> 35 #include <cstdint> 36 #include <cstring> 37 #include <deque> 38 #include <functional> 39 #include <memory> 40 #include <string> 41 #include <type_traits> 42 #include <unordered_map> 43 #include <unordered_set> 44 #include <vector> 45 46 #undef Yield // b/127920555 47 48 namespace vk { 49 50 class Device; 51 class PipelineLayout; 52 class ImageView; 53 class Sampler; 54 class RenderPass; 55 struct SampledImageDescriptor; 56 struct SamplerState; 57 58 namespace dbg { 59 class Context; 60 } // namespace dbg 61 62 } // namespace vk 63 64 namespace sw { 65 66 // Forward declarations. 67 class SpirvRoutine; 68 69 // Incrementally constructed complex bundle of rvalues 70 // Effectively a restricted vector, supporting only: 71 // - allocation to a (runtime-known) fixed component count 72 // - in-place construction of elements 73 // - const operator[] 74 class Intermediate 75 { 76 public: Intermediate(uint32_t componentCount)77 Intermediate(uint32_t componentCount) 78 : componentCount(componentCount) 79 , scalar(new rr::Value *[componentCount]) 80 { 81 for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; } 82 } 83 ~Intermediate()84 ~Intermediate() 85 { 86 delete[] scalar; 87 } 88 89 // TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to 90 // decide the format used to print the intermediate data. 91 enum class TypeHint 92 { 93 Float, 94 Int, 95 UInt 96 }; 97 move(uint32_t i,RValue<SIMD::Float> && scalar)98 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,RValue<SIMD::Int> && scalar)99 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,RValue<SIMD::UInt> && scalar)100 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 101 move(uint32_t i,const RValue<SIMD::Float> & scalar)102 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,const RValue<SIMD::Int> & scalar)103 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,const RValue<SIMD::UInt> & scalar)104 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 105 106 // Value retrieval functions. Float(uint32_t i) const107 RValue<SIMD::Float> Float(uint32_t i) const 108 { 109 ASSERT(i < componentCount); 110 ASSERT(scalar[i] != nullptr); 111 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) 112 } 113 Int(uint32_t i) const114 RValue<SIMD::Int> Int(uint32_t i) const 115 { 116 ASSERT(i < componentCount); 117 ASSERT(scalar[i] != nullptr); 118 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) 119 } 120 UInt(uint32_t i) const121 RValue<SIMD::UInt> UInt(uint32_t i) const 122 { 123 ASSERT(i < componentCount); 124 ASSERT(scalar[i] != nullptr); 125 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) 126 } 127 128 // No copy/move construction or assignment 129 Intermediate(Intermediate const &) = delete; 130 Intermediate(Intermediate &&) = delete; 131 Intermediate &operator=(Intermediate const &) = delete; 132 Intermediate &operator=(Intermediate &&) = delete; 133 134 const uint32_t componentCount; 135 136 private: emplace(uint32_t i,rr::Value * value,TypeHint type)137 void emplace(uint32_t i, rr::Value *value, TypeHint type) 138 { 139 ASSERT(i < componentCount); 140 ASSERT(scalar[i] == nullptr); 141 scalar[i] = value; 142 RR_PRINT_ONLY(typeHint = type;) 143 } 144 145 rr::Value **const scalar; 146 147 #ifdef ENABLE_RR_PRINT 148 friend struct rr::PrintValue::Ty<sw::Intermediate>; 149 TypeHint typeHint = TypeHint::Float; 150 #endif // ENABLE_RR_PRINT 151 }; 152 153 class SpirvShader 154 { 155 public: 156 SpirvBinary insns; 157 158 using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants); 159 160 enum class YieldResult 161 { 162 ControlBarrier, 163 }; 164 165 class Type; 166 class Object; 167 168 // Pseudo-iterator over SPIRV instructions, designed to support range-based-for. 169 class InsnIterator 170 { 171 public: 172 InsnIterator(InsnIterator const &other) = default; 173 174 InsnIterator() = default; 175 InsnIterator(SpirvBinary::const_iterator iter)176 explicit InsnIterator(SpirvBinary::const_iterator iter) 177 : iter{ iter } 178 { 179 } 180 opcode() const181 spv::Op opcode() const 182 { 183 return static_cast<spv::Op>(*iter & spv::OpCodeMask); 184 } 185 wordCount() const186 uint32_t wordCount() const 187 { 188 return *iter >> spv::WordCountShift; 189 } 190 word(uint32_t n) const191 uint32_t word(uint32_t n) const 192 { 193 ASSERT(n < wordCount()); 194 return iter[n]; 195 } 196 wordPointer(uint32_t n) const197 uint32_t const *wordPointer(uint32_t n) const 198 { 199 return &iter[n]; 200 } 201 string(uint32_t n) const202 const char *string(uint32_t n) const 203 { 204 return reinterpret_cast<const char *>(wordPointer(n)); 205 } 206 207 // Returns the number of whole-words that a string literal starting at 208 // word n consumes. If the end of the intruction is reached before the 209 // null-terminator is found, then the function DABORT()s and 0 is 210 // returned. stringSizeInWords(uint32_t n) const211 uint32_t stringSizeInWords(uint32_t n) const 212 { 213 uint32_t c = wordCount(); 214 for(uint32_t i = n; n < c; i++) 215 { 216 auto *u32 = wordPointer(i); 217 auto *u8 = reinterpret_cast<const uint8_t *>(u32); 218 // SPIR-V spec 2.2.1. Instructions: 219 // A string is interpreted as a nul-terminated stream of 220 // characters. The character set is Unicode in the UTF-8 221 // encoding scheme. The UTF-8 octets (8-bit bytes) are packed 222 // four per word, following the little-endian convention (i.e., 223 // the first octet is in the lowest-order 8 bits of the word). 224 // The final word contains the string’s nul-termination 225 // character (0), and all contents past the end of the string in 226 // the final word are padded with 0. 227 if(u8[3] == 0) 228 { 229 return 1 + i - n; 230 } 231 } 232 DABORT("SPIR-V string literal was not null-terminated"); 233 return 0; 234 } 235 hasResultAndType() const236 bool hasResultAndType() const 237 { 238 bool hasResult = false, hasResultType = false; 239 spv::HasResultAndType(opcode(), &hasResult, &hasResultType); 240 241 return hasResultType; 242 } 243 resultTypeId() const244 SpirvID<Type> resultTypeId() const 245 { 246 ASSERT(hasResultAndType()); 247 return word(1); 248 } 249 resultId() const250 SpirvID<Object> resultId() const 251 { 252 ASSERT(hasResultAndType()); 253 return word(2); 254 } 255 distanceFrom(const InsnIterator & other) const256 uint32_t distanceFrom(const InsnIterator &other) const 257 { 258 return static_cast<uint32_t>(iter - other.iter); 259 } 260 operator ==(InsnIterator const & other) const261 bool operator==(InsnIterator const &other) const 262 { 263 return iter == other.iter; 264 } 265 operator !=(InsnIterator const & other) const266 bool operator!=(InsnIterator const &other) const 267 { 268 return iter != other.iter; 269 } 270 operator *() const271 InsnIterator operator*() const 272 { 273 return *this; 274 } 275 operator ++()276 InsnIterator &operator++() 277 { 278 iter += wordCount(); 279 return *this; 280 } 281 operator ++(int)282 InsnIterator const operator++(int) 283 { 284 InsnIterator ret{ *this }; 285 iter += wordCount(); 286 return ret; 287 } 288 289 private: 290 SpirvBinary::const_iterator iter; 291 }; 292 293 // Range-based-for interface begin() const294 InsnIterator begin() const 295 { 296 // Skip over the header words 297 return InsnIterator{ insns.cbegin() + 5 }; 298 } 299 end() const300 InsnIterator end() const 301 { 302 return InsnIterator{ insns.cend() }; 303 } 304 305 class Type 306 { 307 public: 308 using ID = SpirvID<Type>; 309 opcode() const310 spv::Op opcode() const { return definition.opcode(); } 311 312 InsnIterator definition; 313 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); 314 uint32_t componentCount = 0; 315 bool isBuiltInBlock = false; 316 317 // Inner element type for pointers, arrays, vectors and matrices. 318 ID element; 319 }; 320 321 class Object 322 { 323 public: 324 using ID = SpirvID<Object>; 325 opcode() const326 spv::Op opcode() const { return definition.opcode(); } typeId() const327 Type::ID typeId() const { return definition.resultTypeId(); } id() const328 Object::ID id() const { return definition.resultId(); } 329 330 bool isConstantZero() const; 331 332 InsnIterator definition; 333 std::vector<uint32_t> constantValue; 334 335 enum class Kind 336 { 337 // Invalid default kind. 338 // If we get left with an object in this state, the module was 339 // broken. 340 Unknown, 341 342 // TODO: Better document this kind. 343 // A shader interface variable pointer. 344 // Pointer with uniform address across all lanes. 345 // Pointer held by SpirvRoutine::pointers 346 InterfaceVariable, 347 348 // Constant value held by Object::constantValue. 349 Constant, 350 351 // Value held by SpirvRoutine::intermediates. 352 Intermediate, 353 354 // Pointer held by SpirvRoutine::pointers 355 Pointer, 356 357 // A pointer to a vk::DescriptorSet*. 358 // Pointer held by SpirvRoutine::pointers. 359 DescriptorSet, 360 }; 361 362 Kind kind = Kind::Unknown; 363 }; 364 365 // Block is an interval of SPIR-V instructions, starting with the 366 // opening OpLabel, and ending with a termination instruction. 367 class Block 368 { 369 public: 370 using ID = SpirvID<Block>; 371 using Set = std::unordered_set<ID>; 372 373 // Edge represents the graph edge between two blocks. 374 struct Edge 375 { 376 ID from; 377 ID to; 378 operator ==sw::SpirvShader::Block::Edge379 bool operator==(const Edge &other) const { return from == other.from && to == other.to; } 380 381 struct Hash 382 { operator ()sw::SpirvShader::Block::Edge::Hash383 std::size_t operator()(const Edge &edge) const noexcept 384 { 385 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); 386 } 387 }; 388 }; 389 390 Block() = default; 391 Block(const Block &other) = default; 392 explicit Block(InsnIterator begin, InsnIterator end); 393 394 /* range-based-for interface */ begin() const395 inline InsnIterator begin() const { return begin_; } end() const396 inline InsnIterator end() const { return end_; } 397 398 enum Kind 399 { 400 Simple, // OpBranch or other simple terminator. 401 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional 402 UnstructuredBranchConditional, // OpBranchConditional 403 StructuredSwitch, // OpSelectionMerge + OpSwitch 404 UnstructuredSwitch, // OpSwitch 405 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] 406 }; 407 408 Kind kind = Simple; 409 InsnIterator mergeInstruction; // Structured control flow merge instruction. 410 InsnIterator branchInstruction; // Branch instruction. 411 ID mergeBlock; // Structured flow merge block. 412 ID continueTarget; // Loop continue block. 413 Set ins; // Blocks that branch into this block. 414 Set outs; // Blocks that this block branches to. 415 bool isLoopMerge = false; 416 417 private: 418 InsnIterator begin_; 419 InsnIterator end_; 420 }; 421 422 class Function 423 { 424 public: 425 using ID = SpirvID<Function>; 426 427 // Walks all reachable the blocks starting from id adding them to 428 // reachable. 429 void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; 430 431 // AssignBlockFields() performs the following for all reachable blocks: 432 // * Assigns Block::ins with the identifiers of all blocks that contain 433 // this block in their Block::outs. 434 // * Sets Block::isLoopMerge to true if the block is the merge of a 435 // another loop block. 436 void AssignBlockFields(); 437 438 // ForeachBlockDependency calls f with each dependency of the given 439 // block. A dependency is an incoming block that is not a loop-back 440 // edge. 441 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; 442 443 // ExistsPath returns true if there's a direct or indirect flow from 444 // the 'from' block to the 'to' block that does not pass through 445 // notPassingThrough. 446 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; 447 getBlock(Block::ID id) const448 Block const &getBlock(Block::ID id) const 449 { 450 auto it = blocks.find(id); 451 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value()); 452 return it->second; 453 } 454 455 Block::ID entry; // function entry point block. 456 HandleMap<Block> blocks; // blocks belonging to this function. 457 Type::ID type; // type of the function. 458 Type::ID result; // return type. 459 }; 460 461 using String = std::string; 462 using StringID = SpirvID<std::string>; 463 464 class Extension 465 { 466 public: 467 using ID = SpirvID<Extension>; 468 469 enum Name 470 { 471 Unknown, 472 GLSLstd450, 473 OpenCLDebugInfo100 474 }; 475 476 Name name; 477 }; 478 479 struct TypeOrObject 480 {}; 481 482 // TypeOrObjectID is an identifier that represents a Type or an Object, 483 // and supports implicit casting to and from Type::ID or Object::ID. 484 class TypeOrObjectID : public SpirvID<TypeOrObject> 485 { 486 public: 487 using Hash = std::hash<SpirvID<TypeOrObject>>; 488 TypeOrObjectID(uint32_t id)489 inline TypeOrObjectID(uint32_t id) 490 : SpirvID(id) 491 {} TypeOrObjectID(Type::ID id)492 inline TypeOrObjectID(Type::ID id) 493 : SpirvID(id.value()) 494 {} TypeOrObjectID(Object::ID id)495 inline TypeOrObjectID(Object::ID id) 496 : SpirvID(id.value()) 497 {} operator Type::ID() const498 inline operator Type::ID() const { return Type::ID(value()); } operator Object::ID() const499 inline operator Object::ID() const { return Object::ID(value()); } 500 }; 501 502 // OpImageSample variants 503 enum Variant : uint32_t 504 { 505 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. 506 Dref, 507 Proj, 508 ProjDref, 509 VARIANT_LAST = ProjDref 510 }; 511 512 // Compact representation of image instruction state that is passed to the 513 // trampoline function for retrieving/generating the corresponding sampling routine. 514 struct ImageInstructionSignature 515 { ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature516 ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod) 517 { 518 this->variant = variant; 519 this->samplerMethod = samplerMethod; 520 } 521 522 // Unmarshal from raw 32-bit data ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature523 explicit ImageInstructionSignature(uint32_t signature) 524 : signature(signature) 525 {} 526 getSamplerFunctionsw::SpirvShader::ImageInstructionSignature527 SamplerFunction getSamplerFunction() const 528 { 529 return { samplerMethod, offset != 0, sample != 0 }; 530 } 531 isDrefsw::SpirvShader::ImageInstructionSignature532 bool isDref() const 533 { 534 return (variant == Dref) || (variant == ProjDref); 535 } 536 isProjsw::SpirvShader::ImageInstructionSignature537 bool isProj() const 538 { 539 return (variant == Proj) || (variant == ProjDref); 540 } 541 hasLodsw::SpirvShader::ImageInstructionSignature542 bool hasLod() const 543 { 544 return samplerMethod == Lod || samplerMethod == Fetch; // We always pass a Lod operand for Fetch operations. 545 } 546 hasGradsw::SpirvShader::ImageInstructionSignature547 bool hasGrad() const 548 { 549 return samplerMethod == Grad; 550 } 551 552 union 553 { 554 struct 555 { 556 Variant variant : BITS(VARIANT_LAST); 557 SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST); 558 uint32_t gatherComponent : 2; 559 uint32_t dim : BITS(spv::DimSubpassData); // spv::Dim 560 uint32_t arrayed : 1; 561 uint32_t imageFormat : BITS(spv::ImageFormatR64i); // spv::ImageFormat 562 563 // Parameters are passed to the sampling routine in this order: 564 uint32_t coordinates : 3; // 1-4 (does not contain projection component) 565 /* uint32_t dref : 1; */ // Indicated by Variant::ProjDref|Dref 566 /* uint32_t lodOrBias : 1; */ // Indicated by SamplerMethod::Lod|Bias|Fetch 567 uint32_t grad : 2; // 0-3 components (for each of dx / dy) 568 uint32_t offset : 2; // 0-3 components 569 uint32_t sample : 1; // 0-1 scalar integer 570 }; 571 572 uint32_t signature = 0; 573 }; 574 }; 575 576 // This gets stored as a literal in the generated code, so it should be compact. 577 static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit"); 578 579 struct ImageInstruction : public ImageInstructionSignature 580 { 581 ImageInstruction(InsnIterator insn, const SpirvShader &spirv); 582 583 const uint32_t position; 584 585 Type::ID resultTypeId = 0; 586 Object::ID resultId = 0; 587 Object::ID imageId = 0; 588 Object::ID samplerId = 0; 589 Object::ID coordinateId = 0; 590 Object::ID texelId = 0; 591 Object::ID drefId = 0; 592 Object::ID lodOrBiasId = 0; 593 Object::ID gradDxId = 0; 594 Object::ID gradDyId = 0; 595 Object::ID offsetId = 0; 596 Object::ID sampleId = 0; 597 598 private: 599 static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn); 600 static uint32_t getImageOperandsIndex(InsnIterator insn); 601 static uint32_t getImageOperandsMask(InsnIterator insn); 602 }; 603 604 // This method is for retrieving an ID that uniquely identifies the 605 // shader entry point represented by this object. getIdentifier() const606 uint64_t getIdentifier() const 607 { 608 return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier(); 609 } 610 611 SpirvShader(VkShaderStageFlagBits stage, 612 const char *entryPointName, 613 SpirvBinary const &insns, 614 const vk::RenderPass *renderPass, 615 uint32_t subpassIndex, 616 bool robustBufferAccess, 617 const std::shared_ptr<vk::dbg::Context> &dbgctx); 618 619 ~SpirvShader(); 620 621 struct ExecutionModes 622 { 623 bool EarlyFragmentTests : 1; 624 bool DepthReplacing : 1; 625 bool DepthGreater : 1; 626 bool DepthLess : 1; 627 bool DepthUnchanged : 1; 628 629 // Compute workgroup dimensions 630 int WorkgroupSizeX = 1; 631 int WorkgroupSizeY = 1; 632 int WorkgroupSizeZ = 1; 633 }; 634 getExecutionModes() const635 const ExecutionModes &getExecutionModes() const 636 { 637 return executionModes; 638 } 639 640 struct Analysis 641 { 642 bool ContainsKill : 1; 643 bool ContainsControlBarriers : 1; 644 bool NeedsCentroid : 1; 645 bool ContainsSampleQualifier : 1; 646 }; 647 getAnalysis() const648 const Analysis &getAnalysis() const 649 { 650 return analysis; 651 } 652 653 struct Capabilities 654 { 655 bool Matrix : 1; 656 bool Shader : 1; 657 bool StorageImageMultisample : 1; 658 bool ClipDistance : 1; 659 bool CullDistance : 1; 660 bool ImageCubeArray : 1; 661 bool SampleRateShading : 1; 662 bool InputAttachment : 1; 663 bool Sampled1D : 1; 664 bool Image1D : 1; 665 bool SampledBuffer : 1; 666 bool SampledCubeArray : 1; 667 bool ImageBuffer : 1; 668 bool ImageMSArray : 1; 669 bool StorageImageExtendedFormats : 1; 670 bool ImageQuery : 1; 671 bool DerivativeControl : 1; 672 bool InterpolationFunction : 1; 673 bool StorageImageWriteWithoutFormat : 1; 674 bool GroupNonUniform : 1; 675 bool GroupNonUniformVote : 1; 676 bool GroupNonUniformBallot : 1; 677 bool GroupNonUniformShuffle : 1; 678 bool GroupNonUniformShuffleRelative : 1; 679 bool GroupNonUniformArithmetic : 1; 680 bool DeviceGroup : 1; 681 bool MultiView : 1; 682 bool StencilExportEXT : 1; 683 }; 684 getUsedCapabilities() const685 const Capabilities &getUsedCapabilities() const 686 { 687 return capabilities; 688 } 689 690 // getNumOutputClipDistances() returns the number of ClipDistances 691 // outputted by this shader. getNumOutputClipDistances() const692 unsigned int getNumOutputClipDistances() const 693 { 694 if(getUsedCapabilities().ClipDistance) 695 { 696 auto it = outputBuiltins.find(spv::BuiltInClipDistance); 697 if(it != outputBuiltins.end()) 698 { 699 return it->second.SizeInComponents; 700 } 701 } 702 return 0; 703 } 704 705 // getNumOutputCullDistances() returns the number of CullDistances 706 // outputted by this shader. getNumOutputCullDistances() const707 unsigned int getNumOutputCullDistances() const 708 { 709 if(getUsedCapabilities().CullDistance) 710 { 711 auto it = outputBuiltins.find(spv::BuiltInCullDistance); 712 if(it != outputBuiltins.end()) 713 { 714 return it->second.SizeInComponents; 715 } 716 } 717 return 0; 718 } 719 720 enum AttribType : unsigned char 721 { 722 ATTRIBTYPE_FLOAT, 723 ATTRIBTYPE_INT, 724 ATTRIBTYPE_UINT, 725 ATTRIBTYPE_UNUSED, 726 727 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT 728 }; 729 hasBuiltinInput(spv::BuiltIn b) const730 bool hasBuiltinInput(spv::BuiltIn b) const 731 { 732 return inputBuiltins.find(b) != inputBuiltins.end(); 733 } 734 hasBuiltinOutput(spv::BuiltIn b) const735 bool hasBuiltinOutput(spv::BuiltIn b) const 736 { 737 return outputBuiltins.find(b) != outputBuiltins.end(); 738 } 739 740 struct Decorations 741 { 742 int32_t Location = -1; 743 int32_t Component = 0; 744 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); 745 int32_t Offset = -1; 746 int32_t ArrayStride = -1; 747 int32_t MatrixStride = 1; 748 749 bool HasLocation : 1; 750 bool HasComponent : 1; 751 bool HasBuiltIn : 1; 752 bool HasOffset : 1; 753 bool HasArrayStride : 1; 754 bool HasMatrixStride : 1; 755 bool HasRowMajor : 1; // whether RowMajor bit is valid. 756 757 bool Flat : 1; 758 bool Centroid : 1; 759 bool NoPerspective : 1; 760 bool Block : 1; 761 bool BufferBlock : 1; 762 bool RelaxedPrecision : 1; 763 bool RowMajor : 1; // RowMajor if true; ColMajor if false 764 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. 765 Decorationssw::SpirvShader::Decorations766 Decorations() 767 : Location{ -1 } 768 , Component{ 0 } 769 , BuiltIn{ static_cast<spv::BuiltIn>(-1) } 770 , Offset{ -1 } 771 , ArrayStride{ -1 } 772 , MatrixStride{ -1 } 773 , HasLocation{ false } 774 , HasComponent{ false } 775 , HasBuiltIn{ false } 776 , HasOffset{ false } 777 , HasArrayStride{ false } 778 , HasMatrixStride{ false } 779 , HasRowMajor{ false } 780 , Flat{ false } 781 , Centroid{ false } 782 , NoPerspective{ false } 783 , Block{ false } 784 , BufferBlock{ false } 785 , RelaxedPrecision{ false } 786 , RowMajor{ false } 787 , InsideMatrix{ false } 788 { 789 } 790 791 Decorations(Decorations const &) = default; 792 793 void Apply(Decorations const &src); 794 795 void Apply(spv::Decoration decoration, uint32_t arg); 796 }; 797 798 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; 799 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; 800 801 struct DescriptorDecorations 802 { 803 int32_t DescriptorSet = -1; 804 int32_t Binding = -1; 805 int32_t InputAttachmentIndex = -1; 806 807 void Apply(DescriptorDecorations const &src); 808 }; 809 810 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; 811 std::vector<vk::Format> inputAttachmentFormats; 812 813 struct InterfaceComponent 814 { 815 AttribType Type; 816 817 union 818 { 819 struct 820 { 821 bool Flat : 1; 822 bool Centroid : 1; 823 bool NoPerspective : 1; 824 }; 825 826 uint8_t DecorationBits; 827 }; 828 InterfaceComponentsw::SpirvShader::InterfaceComponent829 InterfaceComponent() 830 : Type{ ATTRIBTYPE_UNUSED } 831 , DecorationBits{ 0 } 832 { 833 } 834 }; 835 836 struct BuiltinMapping 837 { 838 Object::ID Id; 839 uint32_t FirstComponent; 840 uint32_t SizeInComponents; 841 }; 842 843 struct WorkgroupMemory 844 { 845 // allocates a new variable of size bytes with the given identifier. allocatesw::SpirvShader::WorkgroupMemory846 inline void allocate(Object::ID id, uint32_t size) 847 { 848 uint32_t offset = totalSize; 849 auto it = offsets.emplace(id, offset); 850 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value())); 851 totalSize += size; 852 } 853 // returns the byte offset of the variable with the given identifier. offsetOfsw::SpirvShader::WorkgroupMemory854 inline uint32_t offsetOf(Object::ID id) const 855 { 856 auto it = offsets.find(id); 857 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value())); 858 return it->second; 859 } 860 // returns the total allocated size in bytes. sizesw::SpirvShader::WorkgroupMemory861 inline uint32_t size() const { return totalSize; } 862 863 private: 864 uint32_t totalSize = 0; // in bytes 865 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes 866 }; 867 868 std::vector<InterfaceComponent> inputs; 869 std::vector<InterfaceComponent> outputs; 870 871 void emitProlog(SpirvRoutine *routine) const; 872 void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const; 873 void emitEpilog(SpirvRoutine *routine) const; 874 void clearPhis(SpirvRoutine *routine) const; 875 containsImageWrite() const876 bool containsImageWrite() const { return imageWriteEmitted; } 877 878 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; 879 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; 880 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; 881 WorkgroupMemory workgroupMemory; 882 883 private: 884 const bool robustBufferAccess; 885 886 Function::ID entryPoint; 887 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. 888 889 ExecutionModes executionModes = {}; 890 Analysis analysis = {}; 891 Capabilities capabilities = {}; 892 HandleMap<Type> types; 893 HandleMap<Object> defs; 894 HandleMap<Function> functions; 895 std::unordered_map<StringID, String> strings; 896 HandleMap<Extension> extensionsByID; 897 std::unordered_set<uint32_t> extensionsImported; 898 mutable bool imageWriteEmitted = false; 899 900 // DeclareType creates a Type for the given OpTypeX instruction, storing 901 // it into the types map. It is called from the analysis pass (constructor). 902 void DeclareType(InsnIterator insn); 903 904 void ProcessExecutionMode(InsnIterator it); 905 906 uint32_t ComputeTypeSize(InsnIterator insn); 907 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; 908 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; 909 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const; 910 911 // Creates an Object for the instruction's result in 'defs'. 912 void DefineResult(const InsnIterator &insn); 913 914 // Processes the OpenCL.Debug.100 instruction for the initial definition 915 // pass of the SPIR-V. 916 void DefineOpenCLDebugInfo100(const InsnIterator &insn); 917 918 // Returns true if data in the given storage class is word-interleaved 919 // by each SIMD vector lane, otherwise data is stored linerally. 920 // 921 // Each lane addresses a single word, picked by a base pointer and an 922 // integer offset. 923 // 924 // A word is currently 32 bits (single float, int32_t, uint32_t). 925 // A lane is a single element of a SIMD vector register. 926 // 927 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): 928 // --------------------------------------------------------------------- 929 // 930 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) 931 // 932 // Assuming SIMD::Width == 4: 933 // 934 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 935 // ===========+===========+===========+========== 936 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] 937 // ---------------+-----------+-----------+-----------+---------- 938 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] 939 // ---------------+-----------+-----------+-----------+---------- 940 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] 941 // ---------------+-----------+-----------+-----------+---------- 942 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] 943 // 944 // 945 // Linear storage - (IsStorageInterleavedByLane() == false): 946 // --------------------------------------------------------- 947 // 948 // Address = PtrBase + sizeof(Word) * LaneOffset 949 // 950 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 951 // ===========+===========+===========+========== 952 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] 953 // ---------------+-----------+-----------+-----------+---------- 954 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] 955 // ---------------+-----------+-----------+-----------+---------- 956 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] 957 // ---------------+-----------+-----------+-----------+---------- 958 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] 959 // 960 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); 961 static bool IsExplicitLayout(spv::StorageClass storageClass); 962 963 static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p); 964 965 // Output storage buffers and images should not be affected by helper invocations 966 static bool StoresInHelperInvocation(spv::StorageClass storageClass); 967 968 using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; 969 970 void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; 971 972 int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; 973 974 // MemoryElement describes a scalar element within a structure, and is 975 // used by the callback function of VisitMemoryObject(). 976 struct MemoryElement 977 { 978 uint32_t index; // index of the scalar element 979 uint32_t offset; // offset (in bytes) from the base of the object 980 const Type &type; // element type 981 }; 982 983 using MemoryVisitor = std::function<void(const MemoryElement &)>; 984 985 // VisitMemoryObject() walks a type tree in an explicitly laid out 986 // storage class, calling the MemoryVisitor for each scalar element 987 // within the 988 void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const; 989 990 // VisitMemoryObjectInner() is internally called by VisitMemoryObject() 991 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const; 992 993 Object &CreateConstant(InsnIterator it); 994 995 void ProcessInterfaceVariable(Object &object); 996 997 // EmitState holds control-flow state for the emit() pass. 998 class EmitState 999 { 1000 public: EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,bool robustBufferAccess,unsigned int multiSampleCount,spv::ExecutionModel executionModel)1001 EmitState(SpirvRoutine *routine, 1002 Function::ID function, 1003 RValue<SIMD::Int> activeLaneMask, 1004 RValue<SIMD::Int> storesAndAtomicsMask, 1005 const vk::DescriptorSet::Bindings &descriptorSets, 1006 bool robustBufferAccess, 1007 unsigned int multiSampleCount, 1008 spv::ExecutionModel executionModel) 1009 : routine(routine) 1010 , function(function) 1011 , activeLaneMaskValue(activeLaneMask.value()) 1012 , storesAndAtomicsMaskValue(storesAndAtomicsMask.value()) 1013 , descriptorSets(descriptorSets) 1014 , robustBufferAccess(robustBufferAccess) 1015 , multiSampleCount(multiSampleCount) 1016 , executionModel(executionModel) 1017 { 1018 ASSERT(executionModel != spv::ExecutionModelMax); // Must parse OpEntryPoint before emitting. 1019 } 1020 1021 // Returns the mask describing the active lanes as updated by dynamic 1022 // control flow. Active lanes include helper invocations, used for 1023 // calculating fragment derivitives, which must not perform memory 1024 // stores or atomic writes. 1025 // 1026 // Use activeStoresAndAtomicsMask() to consider both control flow and 1027 // lanes which are permitted to perform memory stores and atomic 1028 // operations activeLaneMask() const1029 RValue<SIMD::Int> activeLaneMask() const 1030 { 1031 ASSERT(activeLaneMaskValue != nullptr); 1032 return RValue<SIMD::Int>(activeLaneMaskValue); 1033 } 1034 1035 // Returns the immutable lane mask that describes which lanes are 1036 // permitted to perform memory stores and atomic operations. 1037 // Note that unlike activeStoresAndAtomicsMask() this mask *does not* 1038 // consider lanes that have been made inactive due to control flow. storesAndAtomicsMask() const1039 RValue<SIMD::Int> storesAndAtomicsMask() const 1040 { 1041 ASSERT(storesAndAtomicsMaskValue != nullptr); 1042 return RValue<SIMD::Int>(storesAndAtomicsMaskValue); 1043 } 1044 1045 // Returns a lane mask that describes which lanes are permitted to 1046 // perform memory stores and atomic operations, considering lanes that 1047 // may have been made inactive due to control flow. activeStoresAndAtomicsMask() const1048 RValue<SIMD::Int> activeStoresAndAtomicsMask() const 1049 { 1050 return activeLaneMask() & storesAndAtomicsMask(); 1051 } 1052 1053 // Add a new active lane mask edge from the current block to out. 1054 // The edge mask value will be (mask AND activeLaneMaskValue). 1055 // If multiple active lane masks are added for the same edge, then 1056 // they will be ORed together. 1057 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); 1058 1059 // Add a new active lane mask for the edge from -> to. 1060 // If multiple active lane masks are added for the same edge, then 1061 // they will be ORed together. 1062 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); 1063 1064 SpirvRoutine *routine = nullptr; // The current routine being built. 1065 Function::ID function; // The current function being built. 1066 Block::ID block; // The current block being built. 1067 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. 1068 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. 1069 Block::Set visited; // Blocks already built. 1070 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; 1071 std::deque<Block::ID> *pending; 1072 1073 const vk::DescriptorSet::Bindings &descriptorSets; 1074 1075 OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; 1076 getMultiSampleCount() const1077 unsigned int getMultiSampleCount() const { return multiSampleCount; } 1078 createIntermediate(Object::ID id,uint32_t componentCount)1079 Intermediate &createIntermediate(Object::ID id, uint32_t componentCount) 1080 { 1081 auto it = intermediates.emplace(std::piecewise_construct, 1082 std::forward_as_tuple(id), 1083 std::forward_as_tuple(componentCount)); 1084 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); 1085 return it.first->second; 1086 } 1087 getIntermediate(Object::ID id) const1088 Intermediate const &getIntermediate(Object::ID id) const 1089 { 1090 auto it = intermediates.find(id); 1091 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); 1092 return it->second; 1093 } 1094 createPointer(Object::ID id,SIMD::Pointer ptr)1095 void createPointer(Object::ID id, SIMD::Pointer ptr) 1096 { 1097 bool added = pointers.emplace(id, ptr).second; 1098 ASSERT_MSG(added, "Pointer %d created twice", id.value()); 1099 } 1100 getPointer(Object::ID id) const1101 SIMD::Pointer const &getPointer(Object::ID id) const 1102 { 1103 auto it = pointers.find(id); 1104 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); 1105 return it->second; 1106 } 1107 1108 private: 1109 std::unordered_map<Object::ID, Intermediate> intermediates; 1110 std::unordered_map<Object::ID, SIMD::Pointer> pointers; 1111 1112 const bool robustBufferAccess; // Emit robustBufferAccess safe code. 1113 const unsigned int multiSampleCount; 1114 const spv::ExecutionModel executionModel; 1115 }; 1116 1117 // EmitResult is an enumerator of result values from the Emit functions. 1118 enum class EmitResult 1119 { 1120 Continue, // No termination instructions. 1121 Terminator, // Reached a termination instruction. 1122 }; 1123 1124 // Generic wrapper over either per-lane intermediate value, or a constant. 1125 // Constants are transparently widened to per-lane values in operator[]. 1126 // This is appropriate in most cases -- if we're not going to do something 1127 // significantly different based on whether the value is uniform across lanes. 1128 class Operand 1129 { 1130 public: 1131 Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId); 1132 Operand(const Intermediate &value); 1133 Float(uint32_t i) const1134 RValue<SIMD::Float> Float(uint32_t i) const 1135 { 1136 if(intermediate) 1137 { 1138 return intermediate->Float(i); 1139 } 1140 1141 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact 1142 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". 1143 // Thus we must first construct an integer constant, and bitcast to float. 1144 return As<SIMD::Float>(SIMD::UInt(constant[i])); 1145 } 1146 Int(uint32_t i) const1147 RValue<SIMD::Int> Int(uint32_t i) const 1148 { 1149 if(intermediate) 1150 { 1151 return intermediate->Int(i); 1152 } 1153 1154 return SIMD::Int(constant[i]); 1155 } 1156 UInt(uint32_t i) const1157 RValue<SIMD::UInt> UInt(uint32_t i) const 1158 { 1159 if(intermediate) 1160 { 1161 return intermediate->UInt(i); 1162 } 1163 1164 return SIMD::UInt(constant[i]); 1165 } 1166 1167 private: 1168 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1169 1170 // Delegate constructor 1171 Operand(const EmitState *state, const Object &object); 1172 1173 const uint32_t *constant; 1174 const Intermediate *intermediate; 1175 1176 public: 1177 const uint32_t componentCount; 1178 }; 1179 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1180 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1181 1182 Type const &getType(Type::ID id) const 1183 { 1184 auto it = types.find(id); 1185 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value()); 1186 return it->second; 1187 } 1188 getType(const Object & object) const1189 Type const &getType(const Object &object) const 1190 { 1191 return getType(object.typeId()); 1192 } 1193 getObject(Object::ID id) const1194 Object const &getObject(Object::ID id) const 1195 { 1196 auto it = defs.find(id); 1197 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value()); 1198 return it->second; 1199 } 1200 getObjectType(Object::ID id) const1201 Type const &getObjectType(Object::ID id) const 1202 { 1203 return getType(getObject(id)); 1204 } 1205 getFunction(Function::ID id) const1206 Function const &getFunction(Function::ID id) const 1207 { 1208 auto it = functions.find(id); 1209 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value()); 1210 return it->second; 1211 } 1212 getString(StringID id) const1213 String const &getString(StringID id) const 1214 { 1215 auto it = strings.find(id); 1216 ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value()); 1217 return it->second; 1218 } 1219 getExtension(Extension::ID id) const1220 Extension const &getExtension(Extension::ID id) const 1221 { 1222 auto it = extensionsByID.find(id); 1223 ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value()); 1224 return it->second; 1225 } 1226 1227 // Returns a SIMD::Pointer to the underlying data for the given pointer 1228 // object. 1229 // Handles objects of the following kinds: 1230 // - DescriptorSet 1231 // - Pointer 1232 // - InterfaceVariable 1233 // Calling GetPointerToData with objects of any other kind will assert. 1234 SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const; 1235 1236 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; 1237 SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; 1238 1239 // Returns the *component* offset in the literal for the given access chain. 1240 uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const; 1241 1242 // Lookup the active lane mask for the edge from -> to. 1243 // If from is unreachable, then a mask of all zeros is returned. 1244 // Asserts if from is reachable and the edge does not exist. 1245 RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const; 1246 1247 // Updates the current active lane mask. 1248 void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1249 1250 // Emit all the unvisited blocks (except for ignore) in DFS order, 1251 // starting with id. 1252 void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const; 1253 void EmitNonLoop(EmitState *state) const; 1254 void EmitLoop(EmitState *state) const; 1255 1256 void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const; 1257 EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const; 1258 1259 // Emit pass instructions: 1260 EmitResult EmitVariable(InsnIterator insn, EmitState *state) const; 1261 EmitResult EmitLoad(InsnIterator insn, EmitState *state) const; 1262 EmitResult EmitStore(InsnIterator insn, EmitState *state) const; 1263 EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const; 1264 EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const; 1265 EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const; 1266 EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const; 1267 EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const; 1268 EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const; 1269 EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const; 1270 EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const; 1271 EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const; 1272 EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const; 1273 EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const; 1274 EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const; 1275 EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const; 1276 EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const; 1277 EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const; 1278 EmitResult EmitDot(InsnIterator insn, EmitState *state) const; 1279 EmitResult EmitSelect(InsnIterator insn, EmitState *state) const; 1280 EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const; 1281 EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const; 1282 EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const; 1283 EmitResult EmitLine(InsnIterator insn, EmitState *state) const; 1284 EmitResult EmitAny(InsnIterator insn, EmitState *state) const; 1285 EmitResult EmitAll(InsnIterator insn, EmitState *state) const; 1286 EmitResult EmitBranch(InsnIterator insn, EmitState *state) const; 1287 EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const; 1288 EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const; 1289 EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const; 1290 EmitResult EmitReturn(InsnIterator insn, EmitState *state) const; 1291 EmitResult EmitKill(InsnIterator insn, EmitState *state) const; 1292 EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const; 1293 EmitResult EmitPhi(InsnIterator insn, EmitState *state) const; 1294 EmitResult EmitImageSample(const ImageInstruction &instruction, EmitState *state) const; 1295 EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const; 1296 EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const; 1297 EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const; 1298 EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const; 1299 EmitResult EmitImageRead(const ImageInstruction &instruction, EmitState *state) const; 1300 EmitResult EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const; 1301 EmitResult EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const; 1302 EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; 1303 EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; 1304 EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const; 1305 EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const; 1306 EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const; 1307 EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const; 1308 EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const; 1309 EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const; 1310 EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; 1311 1312 // Emits code to sample an image, regardless of whether any SIMD lanes are active. 1313 void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const; 1314 1315 Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const; 1316 void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const; 1317 1318 void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; 1319 static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state); 1320 static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat); 1321 uint32_t GetConstScalarInt(Object::ID id) const; 1322 void EvalSpecConstantOp(InsnIterator insn); 1323 void EvalSpecConstantUnaryOp(InsnIterator insn); 1324 void EvalSpecConstantBinaryOp(InsnIterator insn); 1325 1326 // Fragment input interpolation functions 1327 uint32_t GetNumInputComponents(int32_t location) const; 1328 uint32_t GetPackedInterpolant(int32_t location) const; 1329 enum InterpolationType 1330 { 1331 Centroid, 1332 AtSample, 1333 AtOffset, 1334 }; 1335 SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, 1336 uint32_t component, EmitState *state, InterpolationType type) const; 1337 1338 // Helper for implementing OpStore, which doesn't take an InsnIterator so it 1339 // can also store independent operands. 1340 void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const; 1341 1342 // LoadPhi loads the phi values from the alloca storage and places the 1343 // load values into the intermediate with the phi's result id. 1344 void LoadPhi(InsnIterator insn, EmitState *state) const; 1345 1346 // StorePhi updates the phi's alloca storage value using the incoming 1347 // values from blocks that are both in the OpPhi instruction and in 1348 // filter. 1349 void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const; 1350 1351 // Emits a rr::Fence for the given MemorySemanticsMask. 1352 void Fence(spv::MemorySemanticsMask semantics) const; 1353 1354 // Helper for calling rr::Yield with res cast to an rr::Int. 1355 void Yield(YieldResult res) const; 1356 1357 // WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's 1358 // control flow to the given file path. 1359 void WriteCFGGraphVizDotFile(const char *path) const; 1360 1361 // OpcodeName() returns the name of the opcode op. 1362 static const char *OpcodeName(spv::Op op); 1363 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); 1364 1365 // IsStatement() returns true if the given opcode actually performs 1366 // work (as opposed to declaring a type, defining a function start / end, 1367 // etc). 1368 static bool IsStatement(spv::Op op); 1369 1370 // HasTypeAndResult() returns true if the given opcode's instruction 1371 // has a result type ID and result ID, i.e. defines an Object. 1372 static bool HasTypeAndResult(spv::Op op); 1373 1374 // Helper as we often need to take dot products as part of doing other things. 1375 SIMD::Float Dot(unsigned numComponents, Operand const &x, Operand const &y) const; 1376 1377 // Splits x into a floating-point significand in the range [0.5, 1.0) 1378 // and an integral exponent of two, such that: 1379 // x = significand * 2^exponent 1380 // Returns the pair <significand, exponent> 1381 std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const; 1382 1383 static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId); 1384 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1385 static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1386 1387 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. 1388 static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod); 1389 static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState); 1390 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType); 1391 1392 // Returns 0 when invalid. 1393 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); 1394 1395 // Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these 1396 // are all no-ops. 1397 1398 // dbgInit() initializes the debugger code generation. 1399 // All other dbgXXX() functions are no-op until this is called. 1400 void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx); 1401 1402 // dbgTerm() terminates the debugger code generation. 1403 void dbgTerm(); 1404 1405 // dbgCreateFile() generates a synthetic file containing the disassembly 1406 // of the SPIR-V shader. This is the file displayed in the debug 1407 // session. 1408 void dbgCreateFile(); 1409 1410 // dbgBeginEmit() sets up the debugging state for the shader. 1411 void dbgBeginEmit(EmitState *state) const; 1412 1413 // dbgEndEmit() tears down the debugging state for the shader. 1414 void dbgEndEmit(EmitState *state) const; 1415 1416 // dbgBeginEmitInstruction() updates the current debugger location for 1417 // the given instruction. 1418 void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const; 1419 1420 // dbgEndEmitInstruction() creates any new debugger variables for the 1421 // instruction that just completed. 1422 void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const; 1423 1424 // dbgExposeIntermediate() exposes the intermediate with the given ID to 1425 // the debugger. 1426 void dbgExposeIntermediate(Object::ID id, EmitState *state) const; 1427 1428 // dbgUpdateActiveLaneMask() updates the active lane masks to the 1429 // debugger. 1430 void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1431 1432 // dbgDeclareResult() associates resultId as the result of the given 1433 // instruction. 1434 void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const; 1435 1436 // Impl holds forward declaration structs and pointers to state for the 1437 // private implementations in the corresponding SpirvShaderXXX.cpp files. 1438 // This allows access to the private members of the SpirvShader, without 1439 // littering the header with implementation details. 1440 struct Impl 1441 { 1442 struct Debugger; 1443 struct Group; 1444 Debugger *debugger = nullptr; 1445 }; 1446 Impl impl; 1447 }; 1448 1449 class SpirvRoutine 1450 { 1451 public: 1452 SpirvRoutine(vk::PipelineLayout const *pipelineLayout); 1453 1454 using Variable = Array<SIMD::Float>; 1455 1456 // Single-entry 'inline' sampler routine cache. 1457 struct SamplerCache 1458 { 1459 Pointer<Byte> imageDescriptor = nullptr; 1460 Int samplerId; 1461 1462 Pointer<Byte> function; 1463 }; 1464 1465 struct InterpolationData 1466 { 1467 Pointer<Byte> primitive; 1468 SIMD::Float x; 1469 SIMD::Float y; 1470 SIMD::Float rhw; 1471 SIMD::Float xCentroid; 1472 SIMD::Float yCentroid; 1473 SIMD::Float rhwCentroid; 1474 }; 1475 1476 vk::PipelineLayout const *const pipelineLayout; 1477 1478 std::unordered_map<SpirvShader::Object::ID, Variable> variables; 1479 std::unordered_map<uint32_t, SamplerCache> samplerCache; // Indexed by the instruction position, in words. 1480 Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS }; 1481 Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS }; 1482 InterpolationData interpolationData; 1483 1484 Pointer<Byte> device; 1485 Pointer<Byte> workgroupMemory; 1486 Pointer<Pointer<Byte>> descriptorSets; 1487 Pointer<Int> descriptorDynamicOffsets; 1488 Pointer<Byte> pushConstants; 1489 Pointer<Byte> constants; 1490 Int killMask = Int{ 0 }; 1491 1492 // Shader invocation state. 1493 // Not all of these variables are used for every type of shader, and some 1494 // are only used when debugging. See b/146486064 for more information. 1495 // Give careful consideration to the runtime performance loss before adding 1496 // more state here. 1497 std::array<SIMD::Int, 2> windowSpacePosition; 1498 Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex 1499 Int instanceID; 1500 SIMD::Int vertexIndex; 1501 std::array<SIMD::Float, 4> fragCoord; 1502 std::array<SIMD::Float, 4> pointCoord; 1503 SIMD::Int helperInvocation; 1504 Int4 numWorkgroups; 1505 Int4 workgroupID; 1506 Int4 workgroupSize; 1507 Int subgroupsPerWorkgroup; 1508 Int invocationsPerSubgroup; 1509 Int subgroupIndex; 1510 SIMD::Int localInvocationIndex; 1511 std::array<SIMD::Int, 3> localInvocationID; 1512 std::array<SIMD::Int, 3> globalInvocationID; 1513 1514 Pointer<Byte> dbgState; // Pointer to a debugger state. 1515 createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1516 void createVariable(SpirvShader::Object::ID id, uint32_t componentCount) 1517 { 1518 bool added = variables.emplace(id, Variable(componentCount)).second; 1519 ASSERT_MSG(added, "Variable %d created twice", id.value()); 1520 } 1521 getVariable(SpirvShader::Object::ID id)1522 Variable &getVariable(SpirvShader::Object::ID id) 1523 { 1524 auto it = variables.find(id); 1525 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); 1526 return it->second; 1527 } 1528 1529 // setImmutableInputBuiltins() sets all the immutable input builtins, 1530 // common for all shader types. 1531 void setImmutableInputBuiltins(SpirvShader const *shader); 1532 1533 static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); 1534 1535 // setInputBuiltin() calls f() with the builtin and value if the shader 1536 // uses the input builtin, otherwise the call is a no-op. 1537 // F is a function with the signature: 1538 // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) 1539 template<typename F> setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1540 inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f) 1541 { 1542 auto it = shader->inputBuiltins.find(id); 1543 if(it != shader->inputBuiltins.end()) 1544 { 1545 const auto &builtin = it->second; 1546 f(builtin, getVariable(builtin.Id)); 1547 } 1548 } 1549 1550 private: 1551 // The phis are only accessible to SpirvShader as they are only used and 1552 // exist between calls to SpirvShader::emitProlog() and 1553 // SpirvShader::emitEpilog(). 1554 friend class SpirvShader; 1555 1556 std::unordered_map<SpirvShader::Object::ID, Variable> phis; 1557 }; 1558 1559 } // namespace sw 1560 1561 #endif // sw_SpirvShader_hpp 1562