1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_SpirvShader_hpp 16 #define sw_SpirvShader_hpp 17 18 #include "SamplerCore.hpp" 19 #include "ShaderCore.hpp" 20 #include "SpirvBinary.hpp" 21 #include "SpirvID.hpp" 22 #include "SpirvProfiler.hpp" 23 #include "Device/Config.hpp" 24 #include "Device/Sampler.hpp" 25 #include "System/Debug.hpp" 26 #include "System/Math.hpp" 27 #include "System/Types.hpp" 28 #include "Vulkan/VkConfig.hpp" 29 #include "Vulkan/VkDescriptorSet.hpp" 30 31 #define SPV_ENABLE_UTILITY_CODE 32 #include <spirv/unified1/spirv.hpp> 33 34 #include <array> 35 #include <atomic> 36 #include <cstdint> 37 #include <cstring> 38 #include <deque> 39 #include <functional> 40 #include <memory> 41 #include <string> 42 #include <type_traits> 43 #include <unordered_map> 44 #include <unordered_set> 45 #include <vector> 46 47 #undef Yield // b/127920555 48 49 namespace vk { 50 51 class Device; 52 class PipelineLayout; 53 class ImageView; 54 class Sampler; 55 class RenderPass; 56 struct SampledImageDescriptor; 57 struct SamplerState; 58 59 namespace dbg { 60 class Context; 61 } // namespace dbg 62 63 } // namespace vk 64 65 namespace sw { 66 67 // Forward declarations. 68 class SpirvRoutine; 69 70 // Incrementally constructed complex bundle of rvalues 71 // Effectively a restricted vector, supporting only: 72 // - allocation to a (runtime-known) fixed component count 73 // - in-place construction of elements 74 // - const operator[] 75 class Intermediate 76 { 77 public: Intermediate(uint32_t componentCount)78 Intermediate(uint32_t componentCount) 79 : componentCount(componentCount) 80 , scalar(new rr::Value *[componentCount]) 81 { 82 for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; } 83 } 84 ~Intermediate()85 ~Intermediate() 86 { 87 delete[] scalar; 88 } 89 90 // TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to 91 // decide the format used to print the intermediate data. 92 enum class TypeHint 93 { 94 Float, 95 Int, 96 UInt 97 }; 98 move(uint32_t i,RValue<SIMD::Float> && scalar)99 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,RValue<SIMD::Int> && scalar)100 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,RValue<SIMD::UInt> && scalar)101 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 102 move(uint32_t i,const RValue<SIMD::Float> & scalar)103 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,const RValue<SIMD::Int> & scalar)104 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,const RValue<SIMD::UInt> & scalar)105 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 106 107 // Value retrieval functions. Float(uint32_t i) const108 RValue<SIMD::Float> Float(uint32_t i) const 109 { 110 ASSERT(i < componentCount); 111 ASSERT(scalar[i] != nullptr); 112 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) 113 } 114 Int(uint32_t i) const115 RValue<SIMD::Int> Int(uint32_t i) const 116 { 117 ASSERT(i < componentCount); 118 ASSERT(scalar[i] != nullptr); 119 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) 120 } 121 UInt(uint32_t i) const122 RValue<SIMD::UInt> UInt(uint32_t i) const 123 { 124 ASSERT(i < componentCount); 125 ASSERT(scalar[i] != nullptr); 126 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) 127 } 128 129 // No copy/move construction or assignment 130 Intermediate(Intermediate const &) = delete; 131 Intermediate(Intermediate &&) = delete; 132 Intermediate &operator=(Intermediate const &) = delete; 133 Intermediate &operator=(Intermediate &&) = delete; 134 135 const uint32_t componentCount; 136 137 private: emplace(uint32_t i,rr::Value * value,TypeHint type)138 void emplace(uint32_t i, rr::Value *value, TypeHint type) 139 { 140 ASSERT(i < componentCount); 141 ASSERT(scalar[i] == nullptr); 142 scalar[i] = value; 143 RR_PRINT_ONLY(typeHint = type;) 144 } 145 146 rr::Value **const scalar; 147 148 #ifdef ENABLE_RR_PRINT 149 friend struct rr::PrintValue::Ty<sw::Intermediate>; 150 TypeHint typeHint = TypeHint::Float; 151 #endif // ENABLE_RR_PRINT 152 }; 153 154 class SpirvShader 155 { 156 public: 157 SpirvBinary insns; 158 159 using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants); 160 161 enum class YieldResult 162 { 163 ControlBarrier, 164 }; 165 166 class Type; 167 class Object; 168 169 // Pseudo-iterator over SPIR-V instructions, designed to support range-based-for. 170 class InsnIterator 171 { 172 public: 173 InsnIterator() = default; 174 InsnIterator(InsnIterator const &other) = default; 175 InsnIterator &operator=(const InsnIterator &other) = default; 176 InsnIterator(SpirvBinary::const_iterator iter)177 explicit InsnIterator(SpirvBinary::const_iterator iter) 178 : iter{ iter } 179 { 180 } 181 opcode() const182 spv::Op opcode() const 183 { 184 return static_cast<spv::Op>(*iter & spv::OpCodeMask); 185 } 186 wordCount() const187 uint32_t wordCount() const 188 { 189 return *iter >> spv::WordCountShift; 190 } 191 word(uint32_t n) const192 uint32_t word(uint32_t n) const 193 { 194 ASSERT(n < wordCount()); 195 return iter[n]; 196 } 197 data() const198 const uint32_t *data() const 199 { 200 return &iter[0]; 201 } 202 string(uint32_t n) const203 const char *string(uint32_t n) const 204 { 205 return reinterpret_cast<const char *>(&iter[n]); 206 } 207 208 // Returns the number of whole-words that a string literal starting at 209 // word n consumes. If the end of the intruction is reached before the 210 // null-terminator is found, then the function DABORT()s and 0 is 211 // returned. stringSizeInWords(uint32_t n) const212 uint32_t stringSizeInWords(uint32_t n) const 213 { 214 uint32_t c = wordCount(); 215 for(uint32_t i = n; n < c; i++) 216 { 217 const char *s = string(i); 218 // SPIR-V spec 2.2.1. Instructions: 219 // A string is interpreted as a nul-terminated stream of 220 // characters. The character set is Unicode in the UTF-8 221 // encoding scheme. The UTF-8 octets (8-bit bytes) are packed 222 // four per word, following the little-endian convention (i.e., 223 // the first octet is in the lowest-order 8 bits of the word). 224 // The final word contains the string's nul-termination 225 // character (0), and all contents past the end of the string in 226 // the final word are padded with 0. 227 if(s[3] == 0) 228 { 229 return 1 + i - n; 230 } 231 } 232 DABORT("SPIR-V string literal was not null-terminated"); 233 return 0; 234 } 235 hasResultAndType() const236 bool hasResultAndType() const 237 { 238 bool hasResult = false, hasResultType = false; 239 spv::HasResultAndType(opcode(), &hasResult, &hasResultType); 240 241 return hasResultType; 242 } 243 resultTypeId() const244 SpirvID<Type> resultTypeId() const 245 { 246 ASSERT(hasResultAndType()); 247 return word(1); 248 } 249 resultId() const250 SpirvID<Object> resultId() const 251 { 252 ASSERT(hasResultAndType()); 253 return word(2); 254 } 255 distanceFrom(const InsnIterator & other) const256 uint32_t distanceFrom(const InsnIterator &other) const 257 { 258 return static_cast<uint32_t>(iter - other.iter); 259 } 260 operator ==(InsnIterator const & other) const261 bool operator==(InsnIterator const &other) const 262 { 263 return iter == other.iter; 264 } 265 operator !=(InsnIterator const & other) const266 bool operator!=(InsnIterator const &other) const 267 { 268 return iter != other.iter; 269 } 270 operator *() const271 InsnIterator operator*() const 272 { 273 return *this; 274 } 275 operator ++()276 InsnIterator &operator++() 277 { 278 iter += wordCount(); 279 return *this; 280 } 281 operator ++(int)282 InsnIterator const operator++(int) 283 { 284 InsnIterator ret{ *this }; 285 iter += wordCount(); 286 return ret; 287 } 288 289 private: 290 SpirvBinary::const_iterator iter; 291 }; 292 293 // Range-based-for interface begin() const294 InsnIterator begin() const 295 { 296 // Skip over the header words 297 return InsnIterator{ insns.cbegin() + 5 }; 298 } 299 end() const300 InsnIterator end() const 301 { 302 return InsnIterator{ insns.cend() }; 303 } 304 305 // A range of contiguous instruction words. 306 struct Span 307 { Spansw::SpirvShader::Span308 Span(const InsnIterator &insn, uint32_t offset, uint32_t size) 309 : insn(insn) 310 , offset(offset) 311 , wordCount(size) 312 {} 313 operator []sw::SpirvShader::Span314 uint32_t operator[](uint32_t index) const 315 { 316 ASSERT(index < wordCount); 317 return insn.word(offset + index); 318 } 319 sizesw::SpirvShader::Span320 uint32_t size() const 321 { 322 return wordCount; 323 } 324 325 private: 326 const InsnIterator &insn; 327 const uint32_t offset; 328 const uint32_t wordCount; 329 }; 330 331 class Type 332 { 333 public: 334 using ID = SpirvID<Type>; 335 opcode() const336 spv::Op opcode() const { return definition.opcode(); } 337 338 InsnIterator definition; 339 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); 340 uint32_t componentCount = 0; 341 bool isBuiltInBlock = false; 342 343 // Inner element type for pointers, arrays, vectors and matrices. 344 ID element; 345 }; 346 347 class Object 348 { 349 public: 350 using ID = SpirvID<Object>; 351 opcode() const352 spv::Op opcode() const { return definition.opcode(); } typeId() const353 Type::ID typeId() const { return definition.resultTypeId(); } id() const354 Object::ID id() const { return definition.resultId(); } 355 356 bool isConstantZero() const; 357 358 InsnIterator definition; 359 std::vector<uint32_t> constantValue; 360 361 enum class Kind 362 { 363 // Invalid default kind. 364 // If we get left with an object in this state, the module was 365 // broken. 366 Unknown, 367 368 // TODO: Better document this kind. 369 // A shader interface variable pointer. 370 // Pointer with uniform address across all lanes. 371 // Pointer held by SpirvRoutine::pointers 372 InterfaceVariable, 373 374 // Constant value held by Object::constantValue. 375 Constant, 376 377 // Value held by SpirvRoutine::intermediates. 378 Intermediate, 379 380 // Pointer held by SpirvRoutine::pointers 381 Pointer, 382 383 // A pointer to a vk::DescriptorSet*. 384 // Pointer held by SpirvRoutine::pointers. 385 DescriptorSet, 386 }; 387 388 Kind kind = Kind::Unknown; 389 }; 390 391 // Block is an interval of SPIR-V instructions, starting with the 392 // opening OpLabel, and ending with a termination instruction. 393 class Block 394 { 395 public: 396 using ID = SpirvID<Block>; 397 using Set = std::unordered_set<ID>; 398 399 // Edge represents the graph edge between two blocks. 400 struct Edge 401 { 402 ID from; 403 ID to; 404 operator ==sw::SpirvShader::Block::Edge405 bool operator==(const Edge &other) const { return from == other.from && to == other.to; } 406 407 struct Hash 408 { operator ()sw::SpirvShader::Block::Edge::Hash409 std::size_t operator()(const Edge &edge) const noexcept 410 { 411 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); 412 } 413 }; 414 }; 415 416 Block() = default; 417 Block(const Block &other) = default; 418 Block &operator=(const Block &other) = default; 419 explicit Block(InsnIterator begin, InsnIterator end); 420 421 /* range-based-for interface */ begin() const422 inline InsnIterator begin() const { return begin_; } end() const423 inline InsnIterator end() const { return end_; } 424 425 enum Kind 426 { 427 Simple, // OpBranch or other simple terminator. 428 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional 429 UnstructuredBranchConditional, // OpBranchConditional 430 StructuredSwitch, // OpSelectionMerge + OpSwitch 431 UnstructuredSwitch, // OpSwitch 432 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] 433 }; 434 435 Kind kind = Simple; 436 InsnIterator mergeInstruction; // Structured control flow merge instruction. 437 InsnIterator branchInstruction; // Branch instruction. 438 ID mergeBlock; // Structured flow merge block. 439 ID continueTarget; // Loop continue block. 440 Set ins; // Blocks that branch into this block. 441 Set outs; // Blocks that this block branches to. 442 bool isLoopMerge = false; 443 444 private: 445 InsnIterator begin_; 446 InsnIterator end_; 447 }; 448 449 class Function 450 { 451 public: 452 using ID = SpirvID<Function>; 453 454 // Walks all reachable the blocks starting from id adding them to 455 // reachable. 456 void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; 457 458 // AssignBlockFields() performs the following for all reachable blocks: 459 // * Assigns Block::ins with the identifiers of all blocks that contain 460 // this block in their Block::outs. 461 // * Sets Block::isLoopMerge to true if the block is the merge of a 462 // another loop block. 463 void AssignBlockFields(); 464 465 // ForeachBlockDependency calls f with each dependency of the given 466 // block. A dependency is an incoming block that is not a loop-back 467 // edge. 468 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; 469 470 // ExistsPath returns true if there's a direct or indirect flow from 471 // the 'from' block to the 'to' block that does not pass through 472 // notPassingThrough. 473 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; 474 getBlock(Block::ID id) const475 Block const &getBlock(Block::ID id) const 476 { 477 auto it = blocks.find(id); 478 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value()); 479 return it->second; 480 } 481 482 Block::ID entry; // function entry point block. 483 HandleMap<Block> blocks; // blocks belonging to this function. 484 Type::ID type; // type of the function. 485 Type::ID result; // return type. 486 }; 487 488 using String = std::string; 489 using StringID = SpirvID<std::string>; 490 491 class Extension 492 { 493 public: 494 using ID = SpirvID<Extension>; 495 496 enum Name 497 { 498 Unknown, 499 GLSLstd450, 500 OpenCLDebugInfo100, 501 NonSemanticInfo, 502 }; 503 504 Name name; 505 }; 506 507 struct TypeOrObject 508 {}; 509 510 // TypeOrObjectID is an identifier that represents a Type or an Object, 511 // and supports implicit casting to and from Type::ID or Object::ID. 512 class TypeOrObjectID : public SpirvID<TypeOrObject> 513 { 514 public: 515 using Hash = std::hash<SpirvID<TypeOrObject>>; 516 TypeOrObjectID(uint32_t id)517 inline TypeOrObjectID(uint32_t id) 518 : SpirvID(id) 519 {} TypeOrObjectID(Type::ID id)520 inline TypeOrObjectID(Type::ID id) 521 : SpirvID(id.value()) 522 {} TypeOrObjectID(Object::ID id)523 inline TypeOrObjectID(Object::ID id) 524 : SpirvID(id.value()) 525 {} operator Type::ID() const526 inline operator Type::ID() const { return Type::ID(value()); } operator Object::ID() const527 inline operator Object::ID() const { return Object::ID(value()); } 528 }; 529 530 // OpImageSample variants 531 enum Variant : uint32_t 532 { 533 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. 534 Dref, 535 Proj, 536 ProjDref, 537 VARIANT_LAST = ProjDref 538 }; 539 540 // Compact representation of image instruction state that is passed to the 541 // trampoline function for retrieving/generating the corresponding sampling routine. 542 struct ImageInstructionSignature 543 { ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature544 ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod) 545 { 546 this->variant = variant; 547 this->samplerMethod = samplerMethod; 548 } 549 550 // Unmarshal from raw 32-bit data ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature551 explicit ImageInstructionSignature(uint32_t signature) 552 : signature(signature) 553 {} 554 getSamplerFunctionsw::SpirvShader::ImageInstructionSignature555 SamplerFunction getSamplerFunction() const 556 { 557 return { samplerMethod, offset != 0, sample != 0 }; 558 } 559 isDrefsw::SpirvShader::ImageInstructionSignature560 bool isDref() const 561 { 562 return (variant == Dref) || (variant == ProjDref); 563 } 564 isProjsw::SpirvShader::ImageInstructionSignature565 bool isProj() const 566 { 567 return (variant == Proj) || (variant == ProjDref); 568 } 569 hasLodsw::SpirvShader::ImageInstructionSignature570 bool hasLod() const 571 { 572 return samplerMethod == Lod || samplerMethod == Fetch; // We always pass a Lod operand for Fetch operations. 573 } 574 hasGradsw::SpirvShader::ImageInstructionSignature575 bool hasGrad() const 576 { 577 return samplerMethod == Grad; 578 } 579 580 union 581 { 582 struct 583 { 584 Variant variant : BITS(VARIANT_LAST); 585 SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST); 586 uint32_t gatherComponent : 2; 587 uint32_t dim : BITS(spv::DimSubpassData); // spv::Dim 588 uint32_t arrayed : 1; 589 uint32_t imageFormat : BITS(spv::ImageFormatR64i); // spv::ImageFormat 590 591 // Parameters are passed to the sampling routine in this order: 592 uint32_t coordinates : 3; // 1-4 (does not contain projection component) 593 /* uint32_t dref : 1; */ // Indicated by Variant::ProjDref|Dref 594 /* uint32_t lodOrBias : 1; */ // Indicated by SamplerMethod::Lod|Bias|Fetch 595 uint32_t grad : 2; // 0-3 components (for each of dx / dy) 596 uint32_t offset : 2; // 0-3 components 597 uint32_t sample : 1; // 0-1 scalar integer 598 }; 599 600 uint32_t signature = 0; 601 }; 602 }; 603 604 // This gets stored as a literal in the generated code, so it should be compact. 605 static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit"); 606 607 struct ImageInstruction : public ImageInstructionSignature 608 { 609 ImageInstruction(InsnIterator insn, const SpirvShader &spirv); 610 611 const uint32_t position; 612 613 Type::ID resultTypeId = 0; 614 Object::ID resultId = 0; 615 Object::ID imageId = 0; 616 Object::ID samplerId = 0; 617 Object::ID coordinateId = 0; 618 Object::ID texelId = 0; 619 Object::ID drefId = 0; 620 Object::ID lodOrBiasId = 0; 621 Object::ID gradDxId = 0; 622 Object::ID gradDyId = 0; 623 Object::ID offsetId = 0; 624 Object::ID sampleId = 0; 625 626 private: 627 static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn); 628 static uint32_t getImageOperandsIndex(InsnIterator insn); 629 static uint32_t getImageOperandsMask(InsnIterator insn); 630 }; 631 632 // This method is for retrieving an ID that uniquely identifies the 633 // shader entry point represented by this object. getIdentifier() const634 uint64_t getIdentifier() const 635 { 636 return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier(); 637 } 638 639 SpirvShader(VkShaderStageFlagBits stage, 640 const char *entryPointName, 641 SpirvBinary const &insns, 642 const vk::RenderPass *renderPass, 643 uint32_t subpassIndex, 644 bool robustBufferAccess, 645 const std::shared_ptr<vk::dbg::Context> &dbgctx, 646 std::shared_ptr<SpirvProfiler> profiler); 647 648 ~SpirvShader(); 649 650 struct ExecutionModes 651 { 652 bool EarlyFragmentTests : 1; 653 bool DepthReplacing : 1; 654 bool DepthGreater : 1; 655 bool DepthLess : 1; 656 bool DepthUnchanged : 1; 657 658 // Compute workgroup dimensions 659 Object::ID WorkgroupSizeX = 1; 660 Object::ID WorkgroupSizeY = 1; 661 Object::ID WorkgroupSizeZ = 1; 662 bool useWorkgroupSizeId = false; 663 }; 664 getExecutionModes() const665 const ExecutionModes &getExecutionModes() const 666 { 667 return executionModes; 668 } 669 670 struct Analysis 671 { 672 bool ContainsDiscard : 1; // OpKill, OpTerminateInvocation, or OpDemoteToHelperInvocation 673 bool ContainsControlBarriers : 1; 674 bool NeedsCentroid : 1; 675 bool ContainsSampleQualifier : 1; 676 }; 677 getAnalysis() const678 const Analysis &getAnalysis() const 679 { 680 return analysis; 681 } 682 683 struct Capabilities 684 { 685 bool Matrix : 1; 686 bool Shader : 1; 687 bool StorageImageMultisample : 1; 688 bool ClipDistance : 1; 689 bool CullDistance : 1; 690 bool ImageCubeArray : 1; 691 bool SampleRateShading : 1; 692 bool InputAttachment : 1; 693 bool Sampled1D : 1; 694 bool Image1D : 1; 695 bool SampledBuffer : 1; 696 bool SampledCubeArray : 1; 697 bool ImageBuffer : 1; 698 bool ImageMSArray : 1; 699 bool StorageImageExtendedFormats : 1; 700 bool ImageQuery : 1; 701 bool DerivativeControl : 1; 702 bool DotProductInputAll : 1; 703 bool DotProductInput4x8Bit : 1; 704 bool DotProductInput4x8BitPacked : 1; 705 bool DotProduct : 1; 706 bool InterpolationFunction : 1; 707 bool StorageImageWriteWithoutFormat : 1; 708 bool GroupNonUniform : 1; 709 bool GroupNonUniformVote : 1; 710 bool GroupNonUniformBallot : 1; 711 bool GroupNonUniformShuffle : 1; 712 bool GroupNonUniformShuffleRelative : 1; 713 bool GroupNonUniformArithmetic : 1; 714 bool DeviceGroup : 1; 715 bool MultiView : 1; 716 bool DemoteToHelperInvocation : 1; 717 bool StencilExportEXT : 1; 718 bool VulkanMemoryModel : 1; 719 bool VulkanMemoryModelDeviceScope : 1; 720 }; 721 getUsedCapabilities() const722 const Capabilities &getUsedCapabilities() const 723 { 724 return capabilities; 725 } 726 727 // getNumOutputClipDistances() returns the number of ClipDistances 728 // outputted by this shader. getNumOutputClipDistances() const729 unsigned int getNumOutputClipDistances() const 730 { 731 if(getUsedCapabilities().ClipDistance) 732 { 733 auto it = outputBuiltins.find(spv::BuiltInClipDistance); 734 if(it != outputBuiltins.end()) 735 { 736 return it->second.SizeInComponents; 737 } 738 } 739 return 0; 740 } 741 742 // getNumOutputCullDistances() returns the number of CullDistances 743 // outputted by this shader. getNumOutputCullDistances() const744 unsigned int getNumOutputCullDistances() const 745 { 746 if(getUsedCapabilities().CullDistance) 747 { 748 auto it = outputBuiltins.find(spv::BuiltInCullDistance); 749 if(it != outputBuiltins.end()) 750 { 751 return it->second.SizeInComponents; 752 } 753 } 754 return 0; 755 } 756 757 enum AttribType : unsigned char 758 { 759 ATTRIBTYPE_FLOAT, 760 ATTRIBTYPE_INT, 761 ATTRIBTYPE_UINT, 762 ATTRIBTYPE_UNUSED, 763 764 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT 765 }; 766 hasBuiltinInput(spv::BuiltIn b) const767 bool hasBuiltinInput(spv::BuiltIn b) const 768 { 769 return inputBuiltins.find(b) != inputBuiltins.end(); 770 } 771 hasBuiltinOutput(spv::BuiltIn b) const772 bool hasBuiltinOutput(spv::BuiltIn b) const 773 { 774 return outputBuiltins.find(b) != outputBuiltins.end(); 775 } 776 777 struct Decorations 778 { 779 int32_t Location = -1; 780 int32_t Component = 0; 781 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); 782 int32_t Offset = -1; 783 int32_t ArrayStride = -1; 784 int32_t MatrixStride = 1; 785 786 bool HasLocation : 1; 787 bool HasComponent : 1; 788 bool HasBuiltIn : 1; 789 bool HasOffset : 1; 790 bool HasArrayStride : 1; 791 bool HasMatrixStride : 1; 792 bool HasRowMajor : 1; // whether RowMajor bit is valid. 793 794 bool Flat : 1; 795 bool Centroid : 1; 796 bool NoPerspective : 1; 797 bool Block : 1; 798 bool BufferBlock : 1; 799 bool RelaxedPrecision : 1; 800 bool RowMajor : 1; // RowMajor if true; ColMajor if false 801 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. 802 Decorationssw::SpirvShader::Decorations803 Decorations() 804 : Location{ -1 } 805 , Component{ 0 } 806 , BuiltIn{ static_cast<spv::BuiltIn>(-1) } 807 , Offset{ -1 } 808 , ArrayStride{ -1 } 809 , MatrixStride{ -1 } 810 , HasLocation{ false } 811 , HasComponent{ false } 812 , HasBuiltIn{ false } 813 , HasOffset{ false } 814 , HasArrayStride{ false } 815 , HasMatrixStride{ false } 816 , HasRowMajor{ false } 817 , Flat{ false } 818 , Centroid{ false } 819 , NoPerspective{ false } 820 , Block{ false } 821 , BufferBlock{ false } 822 , RelaxedPrecision{ false } 823 , RowMajor{ false } 824 , InsideMatrix{ false } 825 { 826 } 827 828 Decorations(Decorations const &) = default; 829 830 void Apply(Decorations const &src); 831 832 void Apply(spv::Decoration decoration, uint32_t arg); 833 }; 834 835 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; 836 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; 837 838 struct DescriptorDecorations 839 { 840 int32_t DescriptorSet = -1; 841 int32_t Binding = -1; 842 int32_t InputAttachmentIndex = -1; 843 844 void Apply(DescriptorDecorations const &src); 845 }; 846 847 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; 848 std::vector<vk::Format> inputAttachmentFormats; 849 850 struct InterfaceComponent 851 { 852 AttribType Type; 853 854 union 855 { 856 struct 857 { 858 bool Flat : 1; 859 bool Centroid : 1; 860 bool NoPerspective : 1; 861 }; 862 863 uint8_t DecorationBits; 864 }; 865 InterfaceComponentsw::SpirvShader::InterfaceComponent866 InterfaceComponent() 867 : Type{ ATTRIBTYPE_UNUSED } 868 , DecorationBits{ 0 } 869 { 870 } 871 }; 872 873 struct BuiltinMapping 874 { 875 Object::ID Id; 876 uint32_t FirstComponent; 877 uint32_t SizeInComponents; 878 }; 879 880 struct WorkgroupMemory 881 { 882 // allocates a new variable of size bytes with the given identifier. allocatesw::SpirvShader::WorkgroupMemory883 inline void allocate(Object::ID id, uint32_t size) 884 { 885 uint32_t offset = totalSize; 886 auto it = offsets.emplace(id, offset); 887 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value())); 888 totalSize += size; 889 } 890 // returns the byte offset of the variable with the given identifier. offsetOfsw::SpirvShader::WorkgroupMemory891 inline uint32_t offsetOf(Object::ID id) const 892 { 893 auto it = offsets.find(id); 894 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value())); 895 return it->second; 896 } 897 // returns the total allocated size in bytes. sizesw::SpirvShader::WorkgroupMemory898 inline uint32_t size() const { return totalSize; } 899 900 private: 901 uint32_t totalSize = 0; // in bytes 902 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes 903 }; 904 905 std::vector<InterfaceComponent> inputs; 906 std::vector<InterfaceComponent> outputs; 907 908 void emitProlog(SpirvRoutine *routine) const; 909 void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const; 910 void emitEpilog(SpirvRoutine *routine) const; 911 void clearPhis(SpirvRoutine *routine) const; 912 913 uint32_t getWorkgroupSizeX() const; 914 uint32_t getWorkgroupSizeY() const; 915 uint32_t getWorkgroupSizeZ() const; 916 containsImageWrite() const917 bool containsImageWrite() const { return imageWriteEmitted; } 918 919 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; 920 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; 921 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; 922 WorkgroupMemory workgroupMemory; 923 924 private: 925 const bool robustBufferAccess; 926 927 Function::ID entryPoint; 928 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. 929 ExecutionModes executionModes = {}; 930 Capabilities capabilities = {}; 931 spv::AddressingModel addressingModel = spv::AddressingModelLogical; 932 spv::MemoryModel memoryModel = spv::MemoryModelSimple; 933 HandleMap<Extension> extensionsByID; 934 std::unordered_set<uint32_t> extensionsImported; 935 936 Analysis analysis = {}; 937 mutable bool imageWriteEmitted = false; 938 939 HandleMap<Type> types; 940 HandleMap<Object> defs; 941 HandleMap<Function> functions; 942 std::unordered_map<StringID, String> strings; 943 944 std::shared_ptr<SpirvProfiler> profiler; 945 IsProfilingEnabled() const946 bool IsProfilingEnabled() const 947 { 948 return profiler != nullptr; 949 } 950 951 // DeclareType creates a Type for the given OpTypeX instruction, storing 952 // it into the types map. It is called from the analysis pass (constructor). 953 void DeclareType(InsnIterator insn); 954 955 void ProcessExecutionMode(InsnIterator it); 956 957 uint32_t ComputeTypeSize(InsnIterator insn); 958 Decorations GetDecorationsForId(TypeOrObjectID id) const; 959 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; 960 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; 961 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, const Span &indexIds) const; 962 963 // Creates an Object for the instruction's result in 'defs'. 964 void DefineResult(const InsnIterator &insn); 965 966 // Processes the OpenCL.Debug.100 instruction for the initial definition 967 // pass of the SPIR-V. 968 void DefineOpenCLDebugInfo100(const InsnIterator &insn); 969 970 // Returns true if data in the given storage class is word-interleaved 971 // by each SIMD vector lane, otherwise data is stored linerally. 972 // 973 // Each lane addresses a single word, picked by a base pointer and an 974 // integer offset. 975 // 976 // A word is currently 32 bits (single float, int32_t, uint32_t). 977 // A lane is a single element of a SIMD vector register. 978 // 979 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): 980 // --------------------------------------------------------------------- 981 // 982 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) 983 // 984 // Assuming SIMD::Width == 4: 985 // 986 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 987 // ===========+===========+===========+========== 988 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] 989 // ---------------+-----------+-----------+-----------+---------- 990 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] 991 // ---------------+-----------+-----------+-----------+---------- 992 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] 993 // ---------------+-----------+-----------+-----------+---------- 994 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] 995 // 996 // 997 // Linear storage - (IsStorageInterleavedByLane() == false): 998 // --------------------------------------------------------- 999 // 1000 // Address = PtrBase + sizeof(Word) * LaneOffset 1001 // 1002 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 1003 // ===========+===========+===========+========== 1004 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] 1005 // ---------------+-----------+-----------+-----------+---------- 1006 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] 1007 // ---------------+-----------+-----------+-----------+---------- 1008 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] 1009 // ---------------+-----------+-----------+-----------+---------- 1010 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] 1011 // 1012 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); 1013 static bool IsExplicitLayout(spv::StorageClass storageClass); 1014 1015 static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p); 1016 1017 // Output storage buffers and images should not be affected by helper invocations 1018 static bool StoresInHelperInvocation(spv::StorageClass storageClass); 1019 1020 using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; 1021 1022 void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; 1023 1024 int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; 1025 1026 // MemoryElement describes a scalar element within a structure, and is 1027 // used by the callback function of VisitMemoryObject(). 1028 struct MemoryElement 1029 { 1030 uint32_t index; // index of the scalar element 1031 uint32_t offset; // offset (in bytes) from the base of the object 1032 const Type &type; // element type 1033 }; 1034 1035 using MemoryVisitor = std::function<void(const MemoryElement &)>; 1036 1037 // VisitMemoryObject() walks a type tree in an explicitly laid out 1038 // storage class, calling the MemoryVisitor for each scalar element 1039 // within the 1040 void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const; 1041 1042 // VisitMemoryObjectInner() is internally called by VisitMemoryObject() 1043 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const; 1044 1045 Object &CreateConstant(InsnIterator it); 1046 1047 void ProcessInterfaceVariable(Object &object); 1048 1049 // EmitState holds control-flow state for the emit() pass. 1050 class EmitState 1051 { 1052 public: EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,unsigned int multiSampleCount)1053 EmitState(SpirvRoutine *routine, 1054 Function::ID function, 1055 RValue<SIMD::Int> activeLaneMask, 1056 RValue<SIMD::Int> storesAndAtomicsMask, 1057 const vk::DescriptorSet::Bindings &descriptorSets, 1058 unsigned int multiSampleCount) 1059 : routine(routine) 1060 , function(function) 1061 , activeLaneMaskValue(activeLaneMask.value()) 1062 , storesAndAtomicsMaskValue(storesAndAtomicsMask.value()) 1063 , descriptorSets(descriptorSets) 1064 , multiSampleCount(multiSampleCount) 1065 { 1066 } 1067 1068 // Returns the mask describing the active lanes as updated by dynamic 1069 // control flow. Active lanes include helper invocations, used for 1070 // calculating fragment derivitives, which must not perform memory 1071 // stores or atomic writes. 1072 // 1073 // Use activeStoresAndAtomicsMask() to consider both control flow and 1074 // lanes which are permitted to perform memory stores and atomic 1075 // operations activeLaneMask() const1076 RValue<SIMD::Int> activeLaneMask() const 1077 { 1078 ASSERT(activeLaneMaskValue != nullptr); 1079 return RValue<SIMD::Int>(activeLaneMaskValue); 1080 } 1081 1082 // Returns the immutable lane mask that describes which lanes are 1083 // permitted to perform memory stores and atomic operations. 1084 // Note that unlike activeStoresAndAtomicsMask() this mask *does not* 1085 // consider lanes that have been made inactive due to control flow. storesAndAtomicsMask() const1086 RValue<SIMD::Int> storesAndAtomicsMask() const 1087 { 1088 ASSERT(storesAndAtomicsMaskValue != nullptr); 1089 return RValue<SIMD::Int>(storesAndAtomicsMaskValue); 1090 } 1091 1092 // Returns a lane mask that describes which lanes are permitted to 1093 // perform memory stores and atomic operations, considering lanes that 1094 // may have been made inactive due to control flow. activeStoresAndAtomicsMask() const1095 RValue<SIMD::Int> activeStoresAndAtomicsMask() const 1096 { 1097 return activeLaneMask() & storesAndAtomicsMask(); 1098 } 1099 1100 // Add a new active lane mask edge from the current block to out. 1101 // The edge mask value will be (mask AND activeLaneMaskValue). 1102 // If multiple active lane masks are added for the same edge, then 1103 // they will be ORed together. 1104 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); 1105 1106 // Add a new active lane mask for the edge from -> to. 1107 // If multiple active lane masks are added for the same edge, then 1108 // they will be ORed together. 1109 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); 1110 1111 SpirvRoutine *routine = nullptr; // The current routine being built. 1112 Function::ID function; // The current function being built. 1113 Block::ID block; // The current block being built. 1114 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. 1115 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. 1116 Block::Set visited; // Blocks already built. 1117 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; 1118 std::deque<Block::ID> *pending; 1119 1120 const vk::DescriptorSet::Bindings &descriptorSets; 1121 getMultiSampleCount() const1122 unsigned int getMultiSampleCount() const { return multiSampleCount; } 1123 createIntermediate(Object::ID id,uint32_t componentCount)1124 Intermediate &createIntermediate(Object::ID id, uint32_t componentCount) 1125 { 1126 auto it = intermediates.emplace(std::piecewise_construct, 1127 std::forward_as_tuple(id), 1128 std::forward_as_tuple(componentCount)); 1129 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); 1130 return it.first->second; 1131 } 1132 getIntermediate(Object::ID id) const1133 Intermediate const &getIntermediate(Object::ID id) const 1134 { 1135 auto it = intermediates.find(id); 1136 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); 1137 return it->second; 1138 } 1139 createPointer(Object::ID id,SIMD::Pointer ptr)1140 void createPointer(Object::ID id, SIMD::Pointer ptr) 1141 { 1142 bool added = pointers.emplace(id, ptr).second; 1143 ASSERT_MSG(added, "Pointer %d created twice", id.value()); 1144 } 1145 getPointer(Object::ID id) const1146 SIMD::Pointer const &getPointer(Object::ID id) const 1147 { 1148 auto it = pointers.find(id); 1149 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); 1150 return it->second; 1151 } 1152 1153 private: 1154 std::unordered_map<Object::ID, Intermediate> intermediates; 1155 std::unordered_map<Object::ID, SIMD::Pointer> pointers; 1156 1157 const unsigned int multiSampleCount; 1158 }; 1159 1160 // EmitResult is an enumerator of result values from the Emit functions. 1161 enum class EmitResult 1162 { 1163 Continue, // No termination instructions. 1164 Terminator, // Reached a termination instruction. 1165 }; 1166 1167 // Generic wrapper over either per-lane intermediate value, or a constant. 1168 // Constants are transparently widened to per-lane values in operator[]. 1169 // This is appropriate in most cases -- if we're not going to do something 1170 // significantly different based on whether the value is uniform across lanes. 1171 class Operand 1172 { 1173 public: 1174 Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId); 1175 Operand(const Intermediate &value); 1176 Float(uint32_t i) const1177 RValue<SIMD::Float> Float(uint32_t i) const 1178 { 1179 if(intermediate) 1180 { 1181 return intermediate->Float(i); 1182 } 1183 1184 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact 1185 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". 1186 // Thus we must first construct an integer constant, and bitcast to float. 1187 return As<SIMD::Float>(SIMD::UInt(constant[i])); 1188 } 1189 Int(uint32_t i) const1190 RValue<SIMD::Int> Int(uint32_t i) const 1191 { 1192 if(intermediate) 1193 { 1194 return intermediate->Int(i); 1195 } 1196 1197 return SIMD::Int(constant[i]); 1198 } 1199 UInt(uint32_t i) const1200 RValue<SIMD::UInt> UInt(uint32_t i) const 1201 { 1202 if(intermediate) 1203 { 1204 return intermediate->UInt(i); 1205 } 1206 1207 return SIMD::UInt(constant[i]); 1208 } 1209 1210 private: 1211 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1212 1213 // Delegate constructor 1214 Operand(const EmitState *state, const Object &object); 1215 1216 const uint32_t *constant; 1217 const Intermediate *intermediate; 1218 1219 public: 1220 const uint32_t componentCount; 1221 }; 1222 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1223 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1224 1225 Type const &getType(Type::ID id) const 1226 { 1227 auto it = types.find(id); 1228 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value()); 1229 return it->second; 1230 } 1231 getType(const Object & object) const1232 Type const &getType(const Object &object) const 1233 { 1234 return getType(object.typeId()); 1235 } 1236 getObject(Object::ID id) const1237 Object const &getObject(Object::ID id) const 1238 { 1239 auto it = defs.find(id); 1240 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value()); 1241 return it->second; 1242 } 1243 getObjectType(Object::ID id) const1244 Type const &getObjectType(Object::ID id) const 1245 { 1246 return getType(getObject(id)); 1247 } 1248 getFunction(Function::ID id) const1249 Function const &getFunction(Function::ID id) const 1250 { 1251 auto it = functions.find(id); 1252 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value()); 1253 return it->second; 1254 } 1255 getString(StringID id) const1256 String const &getString(StringID id) const 1257 { 1258 auto it = strings.find(id); 1259 ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value()); 1260 return it->second; 1261 } 1262 getExtension(Extension::ID id) const1263 Extension const &getExtension(Extension::ID id) const 1264 { 1265 auto it = extensionsByID.find(id); 1266 ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value()); 1267 return it->second; 1268 } 1269 1270 // Returns a SIMD::Pointer to the underlying data for the given pointer 1271 // object. 1272 // Handles objects of the following kinds: 1273 // - DescriptorSet 1274 // - Pointer 1275 // - InterfaceVariable 1276 // Calling GetPointerToData with objects of any other kind will assert. 1277 SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const; 1278 1279 OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const; 1280 1281 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, const Span &indexIds, const EmitState *state) const; 1282 SIMD::Pointer WalkAccessChain(Object::ID id, const Span &indexIds, const EmitState *state) const; 1283 1284 // Returns the *component* offset in the literal for the given access chain. 1285 uint32_t WalkLiteralAccessChain(Type::ID id, const Span &indexes) const; 1286 1287 // Lookup the active lane mask for the edge from -> to. 1288 // If from is unreachable, then a mask of all zeros is returned. 1289 // Asserts if from is reachable and the edge does not exist. 1290 RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const; 1291 1292 // Updates the current active lane mask. 1293 void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1294 void SetStoresAndAtomicsMask(RValue<SIMD::Int> mask, EmitState *state) const; 1295 1296 // Emit all the unvisited blocks (except for ignore) in DFS order, 1297 // starting with id. 1298 void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const; 1299 void EmitNonLoop(EmitState *state) const; 1300 void EmitLoop(EmitState *state) const; 1301 1302 void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const; 1303 EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const; 1304 1305 // Emit pass instructions: 1306 EmitResult EmitVariable(InsnIterator insn, EmitState *state) const; 1307 EmitResult EmitLoad(InsnIterator insn, EmitState *state) const; 1308 EmitResult EmitStore(InsnIterator insn, EmitState *state) const; 1309 EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const; 1310 EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const; 1311 EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const; 1312 EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const; 1313 EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const; 1314 EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const; 1315 EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const; 1316 EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const; 1317 EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const; 1318 EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const; 1319 EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const; 1320 EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const; 1321 EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const; 1322 EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const; 1323 EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const; 1324 EmitResult EmitDot(InsnIterator insn, EmitState *state) const; 1325 EmitResult EmitSelect(InsnIterator insn, EmitState *state) const; 1326 EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const; 1327 EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const; 1328 EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const; 1329 EmitResult EmitLine(InsnIterator insn, EmitState *state) const; 1330 EmitResult EmitAny(InsnIterator insn, EmitState *state) const; 1331 EmitResult EmitAll(InsnIterator insn, EmitState *state) const; 1332 EmitResult EmitBranch(InsnIterator insn, EmitState *state) const; 1333 EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const; 1334 EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const; 1335 EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const; 1336 EmitResult EmitReturn(InsnIterator insn, EmitState *state) const; 1337 EmitResult EmitTerminateInvocation(InsnIterator insn, EmitState *state) const; 1338 EmitResult EmitDemoteToHelperInvocation(InsnIterator insn, EmitState *state) const; 1339 EmitResult EmitIsHelperInvocation(InsnIterator insn, EmitState *state) const; 1340 EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const; 1341 EmitResult EmitPhi(InsnIterator insn, EmitState *state) const; 1342 EmitResult EmitImageSample(const ImageInstruction &instruction, EmitState *state) const; 1343 EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const; 1344 EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const; 1345 EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const; 1346 EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const; 1347 EmitResult EmitImageRead(const ImageInstruction &instruction, EmitState *state) const; 1348 EmitResult EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const; 1349 EmitResult EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const; 1350 EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; 1351 EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; 1352 EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const; 1353 EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const; 1354 EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const; 1355 EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const; 1356 EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const; 1357 EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const; 1358 EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; 1359 1360 // Emits code to sample an image, regardless of whether any SIMD lanes are active. 1361 void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const; 1362 1363 Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const; 1364 void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const; 1365 1366 void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; 1367 static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state); 1368 static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat); 1369 uint32_t GetConstScalarInt(Object::ID id) const; 1370 void EvalSpecConstantOp(InsnIterator insn); 1371 void EvalSpecConstantUnaryOp(InsnIterator insn); 1372 void EvalSpecConstantBinaryOp(InsnIterator insn); 1373 1374 // Fragment input interpolation functions 1375 uint32_t GetNumInputComponents(int32_t location) const; 1376 uint32_t GetPackedInterpolant(int32_t location) const; 1377 enum InterpolationType 1378 { 1379 Centroid, 1380 AtSample, 1381 AtOffset, 1382 }; 1383 SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, 1384 uint32_t component, EmitState *state, InterpolationType type) const; 1385 1386 // Helper for implementing OpStore, which doesn't take an InsnIterator so it 1387 // can also store independent operands. 1388 void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const; 1389 1390 // LoadPhi loads the phi values from the alloca storage and places the 1391 // load values into the intermediate with the phi's result id. 1392 void LoadPhi(InsnIterator insn, EmitState *state) const; 1393 1394 // StorePhi updates the phi's alloca storage value using the incoming 1395 // values from blocks that are both in the OpPhi instruction and in 1396 // filter. 1397 void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const; 1398 1399 // Emits a rr::Fence for the given MemorySemanticsMask. 1400 void Fence(spv::MemorySemanticsMask semantics) const; 1401 1402 // Helper for calling rr::Yield with res cast to an rr::Int. 1403 void Yield(YieldResult res) const; 1404 1405 // WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's 1406 // control flow to the given file path. 1407 void WriteCFGGraphVizDotFile(const char *path) const; 1408 1409 // OpcodeName() returns the name of the opcode op. 1410 static const char *OpcodeName(spv::Op op); 1411 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); 1412 1413 // IsStatement() returns true if the given opcode actually performs 1414 // work (as opposed to declaring a type, defining a function start / end, 1415 // etc). 1416 static bool IsStatement(spv::Op op); 1417 1418 // HasTypeAndResult() returns true if the given opcode's instruction 1419 // has a result type ID and result ID, i.e. defines an Object. 1420 static bool HasTypeAndResult(spv::Op op); 1421 1422 // Helper as we often need to take dot products as part of doing other things. 1423 static SIMD::Float FDot(unsigned numComponents, Operand const &x, Operand const &y); 1424 static SIMD::Int SDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum); 1425 static SIMD::UInt UDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum); 1426 static SIMD::Int SUDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum); 1427 static SIMD::Int AddSat(RValue<SIMD::Int> a, RValue<SIMD::Int> b); 1428 static SIMD::UInt AddSat(RValue<SIMD::UInt> a, RValue<SIMD::UInt> b); 1429 1430 // Splits x into a floating-point significand in the range [0.5, 1.0) 1431 // and an integral exponent of two, such that: 1432 // x = significand * 2^exponent 1433 // Returns the pair <significand, exponent> 1434 std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const; 1435 1436 static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId); 1437 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1438 static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1439 1440 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. 1441 static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod); 1442 static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState); 1443 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType); 1444 1445 // Returns 0 when invalid. 1446 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); 1447 1448 // Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these 1449 // are all no-ops. 1450 1451 // dbgInit() initializes the debugger code generation. 1452 // All other dbgXXX() functions are no-op until this is called. 1453 void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx); 1454 1455 // dbgTerm() terminates the debugger code generation. 1456 void dbgTerm(); 1457 1458 // dbgCreateFile() generates a synthetic file containing the disassembly 1459 // of the SPIR-V shader. This is the file displayed in the debug 1460 // session. 1461 void dbgCreateFile(); 1462 1463 // dbgBeginEmit() sets up the debugging state for the shader. 1464 void dbgBeginEmit(EmitState *state) const; 1465 1466 // dbgEndEmit() tears down the debugging state for the shader. 1467 void dbgEndEmit(EmitState *state) const; 1468 1469 // dbgBeginEmitInstruction() updates the current debugger location for 1470 // the given instruction. 1471 void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const; 1472 1473 // dbgEndEmitInstruction() creates any new debugger variables for the 1474 // instruction that just completed. 1475 void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const; 1476 1477 // dbgExposeIntermediate() exposes the intermediate with the given ID to 1478 // the debugger. 1479 void dbgExposeIntermediate(Object::ID id, EmitState *state) const; 1480 1481 // dbgUpdateActiveLaneMask() updates the active lane masks to the 1482 // debugger. 1483 void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const; 1484 1485 // dbgDeclareResult() associates resultId as the result of the given 1486 // instruction. 1487 void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const; 1488 1489 // Impl holds forward declaration structs and pointers to state for the 1490 // private implementations in the corresponding SpirvShaderXXX.cpp files. 1491 // This allows access to the private members of the SpirvShader, without 1492 // littering the header with implementation details. 1493 struct Impl 1494 { 1495 struct Debugger; 1496 struct Group; 1497 Debugger *debugger = nullptr; 1498 }; 1499 Impl impl; 1500 }; 1501 1502 class SpirvRoutine 1503 { 1504 public: 1505 SpirvRoutine(vk::PipelineLayout const *pipelineLayout); 1506 1507 using Variable = Array<SIMD::Float>; 1508 1509 // Single-entry 'inline' sampler routine cache. 1510 struct SamplerCache 1511 { 1512 Pointer<Byte> imageDescriptor = nullptr; 1513 Int samplerId; 1514 1515 Pointer<Byte> function; 1516 }; 1517 1518 struct InterpolationData 1519 { 1520 Pointer<Byte> primitive; 1521 SIMD::Float x; 1522 SIMD::Float y; 1523 SIMD::Float rhw; 1524 SIMD::Float xCentroid; 1525 SIMD::Float yCentroid; 1526 SIMD::Float rhwCentroid; 1527 }; 1528 1529 vk::PipelineLayout const *const pipelineLayout; 1530 1531 std::unordered_map<SpirvShader::Object::ID, Variable> variables; 1532 std::unordered_map<uint32_t, SamplerCache> samplerCache; // Indexed by the instruction position, in words. 1533 SIMD::Float inputs[MAX_INTERFACE_COMPONENTS]; 1534 SIMD::Float outputs[MAX_INTERFACE_COMPONENTS]; 1535 InterpolationData interpolationData; 1536 1537 Pointer<Byte> device; 1538 Pointer<Byte> workgroupMemory; 1539 Pointer<Pointer<Byte>> descriptorSets; 1540 Pointer<Int> descriptorDynamicOffsets; 1541 Pointer<Byte> pushConstants; 1542 Pointer<Byte> constants; 1543 Int discardMask = 0; 1544 1545 // Shader invocation state. 1546 // Not all of these variables are used for every type of shader, and some 1547 // are only used when debugging. See b/146486064 for more information. 1548 // Give careful consideration to the runtime performance loss before adding 1549 // more state here. 1550 std::array<SIMD::Int, 2> windowSpacePosition; 1551 Int layer; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex 1552 Int instanceID; 1553 SIMD::Int vertexIndex; 1554 std::array<SIMD::Float, 4> fragCoord; 1555 std::array<SIMD::Float, 4> pointCoord; 1556 SIMD::Int helperInvocation; 1557 Int4 numWorkgroups; 1558 Int4 workgroupID; 1559 Int4 workgroupSize; 1560 Int subgroupsPerWorkgroup; 1561 Int invocationsPerSubgroup; 1562 Int subgroupIndex; 1563 SIMD::Int localInvocationIndex; 1564 std::array<SIMD::Int, 3> localInvocationID; 1565 std::array<SIMD::Int, 3> globalInvocationID; 1566 1567 Pointer<Byte> dbgState; // Pointer to a debugger state. 1568 createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1569 void createVariable(SpirvShader::Object::ID id, uint32_t componentCount) 1570 { 1571 bool added = variables.emplace(id, Variable(componentCount)).second; 1572 ASSERT_MSG(added, "Variable %d created twice", id.value()); 1573 } 1574 getVariable(SpirvShader::Object::ID id)1575 Variable &getVariable(SpirvShader::Object::ID id) 1576 { 1577 auto it = variables.find(id); 1578 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); 1579 return it->second; 1580 } 1581 1582 // setImmutableInputBuiltins() sets all the immutable input builtins, 1583 // common for all shader types. 1584 void setImmutableInputBuiltins(SpirvShader const *shader); 1585 1586 static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); 1587 1588 // setInputBuiltin() calls f() with the builtin and value if the shader 1589 // uses the input builtin, otherwise the call is a no-op. 1590 // F is a function with the signature: 1591 // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) 1592 template<typename F> setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1593 inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f) 1594 { 1595 auto it = shader->inputBuiltins.find(id); 1596 if(it != shader->inputBuiltins.end()) 1597 { 1598 const auto &builtin = it->second; 1599 f(builtin, getVariable(builtin.Id)); 1600 } 1601 } 1602 1603 private: 1604 // The phis and the profile data are only accessible to SpirvShader 1605 // as they are only used and exist between calls to 1606 // SpirvShader::emitProlog() and SpirvShader::emitEpilog(). 1607 friend class SpirvShader; 1608 1609 std::unordered_map<SpirvShader::Object::ID, Variable> phis; 1610 std::unique_ptr<SpirvProfileData> profData; 1611 }; 1612 1613 } // namespace sw 1614 1615 #endif // sw_SpirvShader_hpp 1616