• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_SpirvShader_hpp
16 #define sw_SpirvShader_hpp
17 
18 #include "SamplerCore.hpp"
19 #include "ShaderCore.hpp"
20 #include "SpirvBinary.hpp"
21 #include "SpirvID.hpp"
22 #include "SpirvProfiler.hpp"
23 #include "Device/Config.hpp"
24 #include "Device/Sampler.hpp"
25 #include "System/Debug.hpp"
26 #include "System/Math.hpp"
27 #include "System/Types.hpp"
28 #include "Vulkan/VkConfig.hpp"
29 #include "Vulkan/VkDescriptorSet.hpp"
30 
31 #define SPV_ENABLE_UTILITY_CODE
32 #include <spirv/unified1/spirv.hpp>
33 
34 #include <array>
35 #include <atomic>
36 #include <cstdint>
37 #include <cstring>
38 #include <deque>
39 #include <functional>
40 #include <memory>
41 #include <string>
42 #include <type_traits>
43 #include <unordered_map>
44 #include <unordered_set>
45 #include <vector>
46 
47 #undef Yield  // b/127920555
48 
49 namespace vk {
50 
51 class Device;
52 class PipelineLayout;
53 class ImageView;
54 class Sampler;
55 class RenderPass;
56 struct SampledImageDescriptor;
57 struct SamplerState;
58 
59 namespace dbg {
60 class Context;
61 }  // namespace dbg
62 
63 }  // namespace vk
64 
65 namespace sw {
66 
67 // Forward declarations.
68 class SpirvRoutine;
69 
70 // Incrementally constructed complex bundle of rvalues
71 // Effectively a restricted vector, supporting only:
72 // - allocation to a (runtime-known) fixed component count
73 // - in-place construction of elements
74 // - const operator[]
75 class Intermediate
76 {
77 public:
Intermediate(uint32_t componentCount)78 	Intermediate(uint32_t componentCount)
79 	    : componentCount(componentCount)
80 	    , scalar(new rr::Value *[componentCount])
81 	{
82 		for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; }
83 	}
84 
~Intermediate()85 	~Intermediate()
86 	{
87 		delete[] scalar;
88 	}
89 
90 	// TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to
91 	// decide the format used to print the intermediate data.
92 	enum class TypeHint
93 	{
94 		Float,
95 		Int,
96 		UInt
97 	};
98 
move(uint32_t i,RValue<SIMD::Float> && scalar)99 	void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,RValue<SIMD::Int> && scalar)100 	void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,RValue<SIMD::UInt> && scalar)101 	void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
102 
move(uint32_t i,const RValue<SIMD::Float> & scalar)103 	void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,const RValue<SIMD::Int> & scalar)104 	void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,const RValue<SIMD::UInt> & scalar)105 	void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
106 
107 	// Value retrieval functions.
Float(uint32_t i) const108 	RValue<SIMD::Float> Float(uint32_t i) const
109 	{
110 		ASSERT(i < componentCount);
111 		ASSERT(scalar[i] != nullptr);
112 		return As<SIMD::Float>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Float>(scalar)
113 	}
114 
Int(uint32_t i) const115 	RValue<SIMD::Int> Int(uint32_t i) const
116 	{
117 		ASSERT(i < componentCount);
118 		ASSERT(scalar[i] != nullptr);
119 		return As<SIMD::Int>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Int>(scalar)
120 	}
121 
UInt(uint32_t i) const122 	RValue<SIMD::UInt> UInt(uint32_t i) const
123 	{
124 		ASSERT(i < componentCount);
125 		ASSERT(scalar[i] != nullptr);
126 		return As<SIMD::UInt>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::UInt>(scalar)
127 	}
128 
129 	// No copy/move construction or assignment
130 	Intermediate(Intermediate const &) = delete;
131 	Intermediate(Intermediate &&) = delete;
132 	Intermediate &operator=(Intermediate const &) = delete;
133 	Intermediate &operator=(Intermediate &&) = delete;
134 
135 	const uint32_t componentCount;
136 
137 private:
emplace(uint32_t i,rr::Value * value,TypeHint type)138 	void emplace(uint32_t i, rr::Value *value, TypeHint type)
139 	{
140 		ASSERT(i < componentCount);
141 		ASSERT(scalar[i] == nullptr);
142 		scalar[i] = value;
143 		RR_PRINT_ONLY(typeHint = type;)
144 	}
145 
146 	rr::Value **const scalar;
147 
148 #ifdef ENABLE_RR_PRINT
149 	friend struct rr::PrintValue::Ty<sw::Intermediate>;
150 	TypeHint typeHint = TypeHint::Float;
151 #endif  // ENABLE_RR_PRINT
152 };
153 
154 class SpirvShader
155 {
156 public:
157 	SpirvBinary insns;
158 
159 	using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants);
160 
161 	enum class YieldResult
162 	{
163 		ControlBarrier,
164 	};
165 
166 	class Type;
167 	class Object;
168 
169 	// Pseudo-iterator over SPIR-V instructions, designed to support range-based-for.
170 	class InsnIterator
171 	{
172 	public:
173 		InsnIterator() = default;
174 		InsnIterator(InsnIterator const &other) = default;
175 		InsnIterator &operator=(const InsnIterator &other) = default;
176 
InsnIterator(SpirvBinary::const_iterator iter)177 		explicit InsnIterator(SpirvBinary::const_iterator iter)
178 		    : iter{ iter }
179 		{
180 		}
181 
opcode() const182 		spv::Op opcode() const
183 		{
184 			return static_cast<spv::Op>(*iter & spv::OpCodeMask);
185 		}
186 
wordCount() const187 		uint32_t wordCount() const
188 		{
189 			return *iter >> spv::WordCountShift;
190 		}
191 
word(uint32_t n) const192 		uint32_t word(uint32_t n) const
193 		{
194 			ASSERT(n < wordCount());
195 			return iter[n];
196 		}
197 
data() const198 		const uint32_t *data() const
199 		{
200 			return &iter[0];
201 		}
202 
string(uint32_t n) const203 		const char *string(uint32_t n) const
204 		{
205 			return reinterpret_cast<const char *>(&iter[n]);
206 		}
207 
208 		// Returns the number of whole-words that a string literal starting at
209 		// word n consumes. If the end of the intruction is reached before the
210 		// null-terminator is found, then the function DABORT()s and 0 is
211 		// returned.
stringSizeInWords(uint32_t n) const212 		uint32_t stringSizeInWords(uint32_t n) const
213 		{
214 			uint32_t c = wordCount();
215 			for(uint32_t i = n; n < c; i++)
216 			{
217 				const char *s = string(i);
218 				// SPIR-V spec 2.2.1. Instructions:
219 				// A string is interpreted as a nul-terminated stream of
220 				// characters. The character set is Unicode in the UTF-8
221 				// encoding scheme. The UTF-8 octets (8-bit bytes) are packed
222 				// four per word, following the little-endian convention (i.e.,
223 				// the first octet is in the lowest-order 8 bits of the word).
224 				// The final word contains the string's nul-termination
225 				// character (0), and all contents past the end of the string in
226 				// the final word are padded with 0.
227 				if(s[3] == 0)
228 				{
229 					return 1 + i - n;
230 				}
231 			}
232 			DABORT("SPIR-V string literal was not null-terminated");
233 			return 0;
234 		}
235 
hasResultAndType() const236 		bool hasResultAndType() const
237 		{
238 			bool hasResult = false, hasResultType = false;
239 			spv::HasResultAndType(opcode(), &hasResult, &hasResultType);
240 
241 			return hasResultType;
242 		}
243 
resultTypeId() const244 		SpirvID<Type> resultTypeId() const
245 		{
246 			ASSERT(hasResultAndType());
247 			return word(1);
248 		}
249 
resultId() const250 		SpirvID<Object> resultId() const
251 		{
252 			ASSERT(hasResultAndType());
253 			return word(2);
254 		}
255 
distanceFrom(const InsnIterator & other) const256 		uint32_t distanceFrom(const InsnIterator &other) const
257 		{
258 			return static_cast<uint32_t>(iter - other.iter);
259 		}
260 
operator ==(InsnIterator const & other) const261 		bool operator==(InsnIterator const &other) const
262 		{
263 			return iter == other.iter;
264 		}
265 
operator !=(InsnIterator const & other) const266 		bool operator!=(InsnIterator const &other) const
267 		{
268 			return iter != other.iter;
269 		}
270 
operator *() const271 		InsnIterator operator*() const
272 		{
273 			return *this;
274 		}
275 
operator ++()276 		InsnIterator &operator++()
277 		{
278 			iter += wordCount();
279 			return *this;
280 		}
281 
operator ++(int)282 		InsnIterator const operator++(int)
283 		{
284 			InsnIterator ret{ *this };
285 			iter += wordCount();
286 			return ret;
287 		}
288 
289 	private:
290 		SpirvBinary::const_iterator iter;
291 	};
292 
293 	// Range-based-for interface
begin() const294 	InsnIterator begin() const
295 	{
296 		// Skip over the header words
297 		return InsnIterator{ insns.cbegin() + 5 };
298 	}
299 
end() const300 	InsnIterator end() const
301 	{
302 		return InsnIterator{ insns.cend() };
303 	}
304 
305 	// A range of contiguous instruction words.
306 	struct Span
307 	{
Spansw::SpirvShader::Span308 		Span(const InsnIterator &insn, uint32_t offset, uint32_t size)
309 		    : insn(insn)
310 		    , offset(offset)
311 		    , wordCount(size)
312 		{}
313 
operator []sw::SpirvShader::Span314 		uint32_t operator[](uint32_t index) const
315 		{
316 			ASSERT(index < wordCount);
317 			return insn.word(offset + index);
318 		}
319 
sizesw::SpirvShader::Span320 		uint32_t size() const
321 		{
322 			return wordCount;
323 		}
324 
325 	private:
326 		const InsnIterator &insn;
327 		const uint32_t offset;
328 		const uint32_t wordCount;
329 	};
330 
331 	class Type
332 	{
333 	public:
334 		using ID = SpirvID<Type>;
335 
opcode() const336 		spv::Op opcode() const { return definition.opcode(); }
337 
338 		InsnIterator definition;
339 		spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1);
340 		uint32_t componentCount = 0;
341 		bool isBuiltInBlock = false;
342 
343 		// Inner element type for pointers, arrays, vectors and matrices.
344 		ID element;
345 	};
346 
347 	class Object
348 	{
349 	public:
350 		using ID = SpirvID<Object>;
351 
opcode() const352 		spv::Op opcode() const { return definition.opcode(); }
typeId() const353 		Type::ID typeId() const { return definition.resultTypeId(); }
id() const354 		Object::ID id() const { return definition.resultId(); }
355 
356 		bool isConstantZero() const;
357 
358 		InsnIterator definition;
359 		std::vector<uint32_t> constantValue;
360 
361 		enum class Kind
362 		{
363 			// Invalid default kind.
364 			// If we get left with an object in this state, the module was
365 			// broken.
366 			Unknown,
367 
368 			// TODO: Better document this kind.
369 			// A shader interface variable pointer.
370 			// Pointer with uniform address across all lanes.
371 			// Pointer held by SpirvRoutine::pointers
372 			InterfaceVariable,
373 
374 			// Constant value held by Object::constantValue.
375 			Constant,
376 
377 			// Value held by SpirvRoutine::intermediates.
378 			Intermediate,
379 
380 			// Pointer held by SpirvRoutine::pointers
381 			Pointer,
382 
383 			// A pointer to a vk::DescriptorSet*.
384 			// Pointer held by SpirvRoutine::pointers.
385 			DescriptorSet,
386 		};
387 
388 		Kind kind = Kind::Unknown;
389 	};
390 
391 	// Block is an interval of SPIR-V instructions, starting with the
392 	// opening OpLabel, and ending with a termination instruction.
393 	class Block
394 	{
395 	public:
396 		using ID = SpirvID<Block>;
397 		using Set = std::unordered_set<ID>;
398 
399 		// Edge represents the graph edge between two blocks.
400 		struct Edge
401 		{
402 			ID from;
403 			ID to;
404 
operator ==sw::SpirvShader::Block::Edge405 			bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
406 
407 			struct Hash
408 			{
operator ()sw::SpirvShader::Block::Edge::Hash409 				std::size_t operator()(const Edge &edge) const noexcept
410 				{
411 					return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
412 				}
413 			};
414 		};
415 
416 		Block() = default;
417 		Block(const Block &other) = default;
418 		Block &operator=(const Block &other) = default;
419 		explicit Block(InsnIterator begin, InsnIterator end);
420 
421 		/* range-based-for interface */
begin() const422 		inline InsnIterator begin() const { return begin_; }
end() const423 		inline InsnIterator end() const { return end_; }
424 
425 		enum Kind
426 		{
427 			Simple,                         // OpBranch or other simple terminator.
428 			StructuredBranchConditional,    // OpSelectionMerge + OpBranchConditional
429 			UnstructuredBranchConditional,  // OpBranchConditional
430 			StructuredSwitch,               // OpSelectionMerge + OpSwitch
431 			UnstructuredSwitch,             // OpSwitch
432 			Loop,                           // OpLoopMerge + [OpBranchConditional | OpBranch]
433 		};
434 
435 		Kind kind = Simple;
436 		InsnIterator mergeInstruction;   // Structured control flow merge instruction.
437 		InsnIterator branchInstruction;  // Branch instruction.
438 		ID mergeBlock;                   // Structured flow merge block.
439 		ID continueTarget;               // Loop continue block.
440 		Set ins;                         // Blocks that branch into this block.
441 		Set outs;                        // Blocks that this block branches to.
442 		bool isLoopMerge = false;
443 
444 	private:
445 		InsnIterator begin_;
446 		InsnIterator end_;
447 	};
448 
449 	class Function
450 	{
451 	public:
452 		using ID = SpirvID<Function>;
453 
454 		// Walks all reachable the blocks starting from id adding them to
455 		// reachable.
456 		void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const;
457 
458 		// AssignBlockFields() performs the following for all reachable blocks:
459 		// * Assigns Block::ins with the identifiers of all blocks that contain
460 		//   this block in their Block::outs.
461 		// * Sets Block::isLoopMerge to true if the block is the merge of a
462 		//   another loop block.
463 		void AssignBlockFields();
464 
465 		// ForeachBlockDependency calls f with each dependency of the given
466 		// block. A dependency is an incoming block that is not a loop-back
467 		// edge.
468 		void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
469 
470 		// ExistsPath returns true if there's a direct or indirect flow from
471 		// the 'from' block to the 'to' block that does not pass through
472 		// notPassingThrough.
473 		bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const;
474 
getBlock(Block::ID id) const475 		Block const &getBlock(Block::ID id) const
476 		{
477 			auto it = blocks.find(id);
478 			ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value());
479 			return it->second;
480 		}
481 
482 		Block::ID entry;          // function entry point block.
483 		HandleMap<Block> blocks;  // blocks belonging to this function.
484 		Type::ID type;            // type of the function.
485 		Type::ID result;          // return type.
486 	};
487 
488 	using String = std::string;
489 	using StringID = SpirvID<std::string>;
490 
491 	class Extension
492 	{
493 	public:
494 		using ID = SpirvID<Extension>;
495 
496 		enum Name
497 		{
498 			Unknown,
499 			GLSLstd450,
500 			OpenCLDebugInfo100,
501 			NonSemanticInfo,
502 		};
503 
504 		Name name;
505 	};
506 
507 	struct TypeOrObject
508 	{};
509 
510 	// TypeOrObjectID is an identifier that represents a Type or an Object,
511 	// and supports implicit casting to and from Type::ID or Object::ID.
512 	class TypeOrObjectID : public SpirvID<TypeOrObject>
513 	{
514 	public:
515 		using Hash = std::hash<SpirvID<TypeOrObject>>;
516 
TypeOrObjectID(uint32_t id)517 		inline TypeOrObjectID(uint32_t id)
518 		    : SpirvID(id)
519 		{}
TypeOrObjectID(Type::ID id)520 		inline TypeOrObjectID(Type::ID id)
521 		    : SpirvID(id.value())
522 		{}
TypeOrObjectID(Object::ID id)523 		inline TypeOrObjectID(Object::ID id)
524 		    : SpirvID(id.value())
525 		{}
operator Type::ID() const526 		inline operator Type::ID() const { return Type::ID(value()); }
operator Object::ID() const527 		inline operator Object::ID() const { return Object::ID(value()); }
528 	};
529 
530 	// OpImageSample variants
531 	enum Variant : uint32_t
532 	{
533 		None,  // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod.
534 		Dref,
535 		Proj,
536 		ProjDref,
537 		VARIANT_LAST = ProjDref
538 	};
539 
540 	// Compact representation of image instruction state that is passed to the
541 	// trampoline function for retrieving/generating the corresponding sampling routine.
542 	struct ImageInstructionSignature
543 	{
ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature544 		ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod)
545 		{
546 			this->variant = variant;
547 			this->samplerMethod = samplerMethod;
548 		}
549 
550 		// Unmarshal from raw 32-bit data
ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature551 		explicit ImageInstructionSignature(uint32_t signature)
552 		    : signature(signature)
553 		{}
554 
getSamplerFunctionsw::SpirvShader::ImageInstructionSignature555 		SamplerFunction getSamplerFunction() const
556 		{
557 			return { samplerMethod, offset != 0, sample != 0 };
558 		}
559 
isDrefsw::SpirvShader::ImageInstructionSignature560 		bool isDref() const
561 		{
562 			return (variant == Dref) || (variant == ProjDref);
563 		}
564 
isProjsw::SpirvShader::ImageInstructionSignature565 		bool isProj() const
566 		{
567 			return (variant == Proj) || (variant == ProjDref);
568 		}
569 
hasLodsw::SpirvShader::ImageInstructionSignature570 		bool hasLod() const
571 		{
572 			return samplerMethod == Lod || samplerMethod == Fetch;  // We always pass a Lod operand for Fetch operations.
573 		}
574 
hasGradsw::SpirvShader::ImageInstructionSignature575 		bool hasGrad() const
576 		{
577 			return samplerMethod == Grad;
578 		}
579 
580 		union
581 		{
582 			struct
583 			{
584 				Variant variant : BITS(VARIANT_LAST);
585 				SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST);
586 				uint32_t gatherComponent : 2;
587 				uint32_t dim : BITS(spv::DimSubpassData);  // spv::Dim
588 				uint32_t arrayed : 1;
589 				uint32_t imageFormat : BITS(spv::ImageFormatR64i);  // spv::ImageFormat
590 
591 				// Parameters are passed to the sampling routine in this order:
592 				uint32_t coordinates : 3;       // 1-4 (does not contain projection component)
593 				/*	uint32_t dref : 1; */       // Indicated by Variant::ProjDref|Dref
594 				/*	uint32_t lodOrBias : 1; */  // Indicated by SamplerMethod::Lod|Bias|Fetch
595 				uint32_t grad : 2;              // 0-3 components (for each of dx / dy)
596 				uint32_t offset : 2;            // 0-3 components
597 				uint32_t sample : 1;            // 0-1 scalar integer
598 			};
599 
600 			uint32_t signature = 0;
601 		};
602 	};
603 
604 	// This gets stored as a literal in the generated code, so it should be compact.
605 	static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit");
606 
607 	struct ImageInstruction : public ImageInstructionSignature
608 	{
609 		ImageInstruction(InsnIterator insn, const SpirvShader &spirv);
610 
611 		const uint32_t position;
612 
613 		Type::ID resultTypeId = 0;
614 		Object::ID resultId = 0;
615 		Object::ID imageId = 0;
616 		Object::ID samplerId = 0;
617 		Object::ID coordinateId = 0;
618 		Object::ID texelId = 0;
619 		Object::ID drefId = 0;
620 		Object::ID lodOrBiasId = 0;
621 		Object::ID gradDxId = 0;
622 		Object::ID gradDyId = 0;
623 		Object::ID offsetId = 0;
624 		Object::ID sampleId = 0;
625 
626 	private:
627 		static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn);
628 		static uint32_t getImageOperandsIndex(InsnIterator insn);
629 		static uint32_t getImageOperandsMask(InsnIterator insn);
630 	};
631 
632 	// This method is for retrieving an ID that uniquely identifies the
633 	// shader entry point represented by this object.
getIdentifier() const634 	uint64_t getIdentifier() const
635 	{
636 		return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier();
637 	}
638 
639 	SpirvShader(VkShaderStageFlagBits stage,
640 	            const char *entryPointName,
641 	            SpirvBinary const &insns,
642 	            const vk::RenderPass *renderPass,
643 	            uint32_t subpassIndex,
644 	            bool robustBufferAccess,
645 	            const std::shared_ptr<vk::dbg::Context> &dbgctx,
646 	            std::shared_ptr<SpirvProfiler> profiler);
647 
648 	~SpirvShader();
649 
650 	struct ExecutionModes
651 	{
652 		bool EarlyFragmentTests : 1;
653 		bool DepthReplacing : 1;
654 		bool DepthGreater : 1;
655 		bool DepthLess : 1;
656 		bool DepthUnchanged : 1;
657 
658 		// Compute workgroup dimensions
659 		Object::ID WorkgroupSizeX = 1;
660 		Object::ID WorkgroupSizeY = 1;
661 		Object::ID WorkgroupSizeZ = 1;
662 		bool useWorkgroupSizeId = false;
663 	};
664 
getExecutionModes() const665 	const ExecutionModes &getExecutionModes() const
666 	{
667 		return executionModes;
668 	}
669 
670 	struct Analysis
671 	{
672 		bool ContainsDiscard : 1;  // OpKill, OpTerminateInvocation, or OpDemoteToHelperInvocation
673 		bool ContainsControlBarriers : 1;
674 		bool NeedsCentroid : 1;
675 		bool ContainsSampleQualifier : 1;
676 	};
677 
getAnalysis() const678 	const Analysis &getAnalysis() const
679 	{
680 		return analysis;
681 	}
682 
683 	struct Capabilities
684 	{
685 		bool Matrix : 1;
686 		bool Shader : 1;
687 		bool StorageImageMultisample : 1;
688 		bool ClipDistance : 1;
689 		bool CullDistance : 1;
690 		bool ImageCubeArray : 1;
691 		bool SampleRateShading : 1;
692 		bool InputAttachment : 1;
693 		bool Sampled1D : 1;
694 		bool Image1D : 1;
695 		bool SampledBuffer : 1;
696 		bool SampledCubeArray : 1;
697 		bool ImageBuffer : 1;
698 		bool ImageMSArray : 1;
699 		bool StorageImageExtendedFormats : 1;
700 		bool ImageQuery : 1;
701 		bool DerivativeControl : 1;
702 		bool DotProductInputAll : 1;
703 		bool DotProductInput4x8Bit : 1;
704 		bool DotProductInput4x8BitPacked : 1;
705 		bool DotProduct : 1;
706 		bool InterpolationFunction : 1;
707 		bool StorageImageWriteWithoutFormat : 1;
708 		bool GroupNonUniform : 1;
709 		bool GroupNonUniformVote : 1;
710 		bool GroupNonUniformBallot : 1;
711 		bool GroupNonUniformShuffle : 1;
712 		bool GroupNonUniformShuffleRelative : 1;
713 		bool GroupNonUniformArithmetic : 1;
714 		bool DeviceGroup : 1;
715 		bool MultiView : 1;
716 		bool DemoteToHelperInvocation : 1;
717 		bool StencilExportEXT : 1;
718 		bool VulkanMemoryModel : 1;
719 		bool VulkanMemoryModelDeviceScope : 1;
720 	};
721 
getUsedCapabilities() const722 	const Capabilities &getUsedCapabilities() const
723 	{
724 		return capabilities;
725 	}
726 
727 	// getNumOutputClipDistances() returns the number of ClipDistances
728 	// outputted by this shader.
getNumOutputClipDistances() const729 	unsigned int getNumOutputClipDistances() const
730 	{
731 		if(getUsedCapabilities().ClipDistance)
732 		{
733 			auto it = outputBuiltins.find(spv::BuiltInClipDistance);
734 			if(it != outputBuiltins.end())
735 			{
736 				return it->second.SizeInComponents;
737 			}
738 		}
739 		return 0;
740 	}
741 
742 	// getNumOutputCullDistances() returns the number of CullDistances
743 	// outputted by this shader.
getNumOutputCullDistances() const744 	unsigned int getNumOutputCullDistances() const
745 	{
746 		if(getUsedCapabilities().CullDistance)
747 		{
748 			auto it = outputBuiltins.find(spv::BuiltInCullDistance);
749 			if(it != outputBuiltins.end())
750 			{
751 				return it->second.SizeInComponents;
752 			}
753 		}
754 		return 0;
755 	}
756 
757 	enum AttribType : unsigned char
758 	{
759 		ATTRIBTYPE_FLOAT,
760 		ATTRIBTYPE_INT,
761 		ATTRIBTYPE_UINT,
762 		ATTRIBTYPE_UNUSED,
763 
764 		ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
765 	};
766 
hasBuiltinInput(spv::BuiltIn b) const767 	bool hasBuiltinInput(spv::BuiltIn b) const
768 	{
769 		return inputBuiltins.find(b) != inputBuiltins.end();
770 	}
771 
hasBuiltinOutput(spv::BuiltIn b) const772 	bool hasBuiltinOutput(spv::BuiltIn b) const
773 	{
774 		return outputBuiltins.find(b) != outputBuiltins.end();
775 	}
776 
777 	struct Decorations
778 	{
779 		int32_t Location = -1;
780 		int32_t Component = 0;
781 		spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1);
782 		int32_t Offset = -1;
783 		int32_t ArrayStride = -1;
784 		int32_t MatrixStride = 1;
785 
786 		bool HasLocation : 1;
787 		bool HasComponent : 1;
788 		bool HasBuiltIn : 1;
789 		bool HasOffset : 1;
790 		bool HasArrayStride : 1;
791 		bool HasMatrixStride : 1;
792 		bool HasRowMajor : 1;  // whether RowMajor bit is valid.
793 
794 		bool Flat : 1;
795 		bool Centroid : 1;
796 		bool NoPerspective : 1;
797 		bool Block : 1;
798 		bool BufferBlock : 1;
799 		bool RelaxedPrecision : 1;
800 		bool RowMajor : 1;      // RowMajor if true; ColMajor if false
801 		bool InsideMatrix : 1;  // pseudo-decoration for whether we're inside a matrix.
802 
Decorationssw::SpirvShader::Decorations803 		Decorations()
804 		    : Location{ -1 }
805 		    , Component{ 0 }
806 		    , BuiltIn{ static_cast<spv::BuiltIn>(-1) }
807 		    , Offset{ -1 }
808 		    , ArrayStride{ -1 }
809 		    , MatrixStride{ -1 }
810 		    , HasLocation{ false }
811 		    , HasComponent{ false }
812 		    , HasBuiltIn{ false }
813 		    , HasOffset{ false }
814 		    , HasArrayStride{ false }
815 		    , HasMatrixStride{ false }
816 		    , HasRowMajor{ false }
817 		    , Flat{ false }
818 		    , Centroid{ false }
819 		    , NoPerspective{ false }
820 		    , Block{ false }
821 		    , BufferBlock{ false }
822 		    , RelaxedPrecision{ false }
823 		    , RowMajor{ false }
824 		    , InsideMatrix{ false }
825 		{
826 		}
827 
828 		Decorations(Decorations const &) = default;
829 
830 		void Apply(Decorations const &src);
831 
832 		void Apply(spv::Decoration decoration, uint32_t arg);
833 	};
834 
835 	std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations;
836 	std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations;
837 
838 	struct DescriptorDecorations
839 	{
840 		int32_t DescriptorSet = -1;
841 		int32_t Binding = -1;
842 		int32_t InputAttachmentIndex = -1;
843 
844 		void Apply(DescriptorDecorations const &src);
845 	};
846 
847 	std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations;
848 	std::vector<vk::Format> inputAttachmentFormats;
849 
850 	struct InterfaceComponent
851 	{
852 		AttribType Type;
853 
854 		union
855 		{
856 			struct
857 			{
858 				bool Flat : 1;
859 				bool Centroid : 1;
860 				bool NoPerspective : 1;
861 			};
862 
863 			uint8_t DecorationBits;
864 		};
865 
InterfaceComponentsw::SpirvShader::InterfaceComponent866 		InterfaceComponent()
867 		    : Type{ ATTRIBTYPE_UNUSED }
868 		    , DecorationBits{ 0 }
869 		{
870 		}
871 	};
872 
873 	struct BuiltinMapping
874 	{
875 		Object::ID Id;
876 		uint32_t FirstComponent;
877 		uint32_t SizeInComponents;
878 	};
879 
880 	struct WorkgroupMemory
881 	{
882 		// allocates a new variable of size bytes with the given identifier.
allocatesw::SpirvShader::WorkgroupMemory883 		inline void allocate(Object::ID id, uint32_t size)
884 		{
885 			uint32_t offset = totalSize;
886 			auto it = offsets.emplace(id, offset);
887 			ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
888 			totalSize += size;
889 		}
890 		// returns the byte offset of the variable with the given identifier.
offsetOfsw::SpirvShader::WorkgroupMemory891 		inline uint32_t offsetOf(Object::ID id) const
892 		{
893 			auto it = offsets.find(id);
894 			ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
895 			return it->second;
896 		}
897 		// returns the total allocated size in bytes.
sizesw::SpirvShader::WorkgroupMemory898 		inline uint32_t size() const { return totalSize; }
899 
900 	private:
901 		uint32_t totalSize = 0;                            // in bytes
902 		std::unordered_map<Object::ID, uint32_t> offsets;  // in bytes
903 	};
904 
905 	std::vector<InterfaceComponent> inputs;
906 	std::vector<InterfaceComponent> outputs;
907 
908 	void emitProlog(SpirvRoutine *routine) const;
909 	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const;
910 	void emitEpilog(SpirvRoutine *routine) const;
911 	void clearPhis(SpirvRoutine *routine) const;
912 
913 	uint32_t getWorkgroupSizeX() const;
914 	uint32_t getWorkgroupSizeY() const;
915 	uint32_t getWorkgroupSizeZ() const;
916 
containsImageWrite() const917 	bool containsImageWrite() const { return imageWriteEmitted; }
918 
919 	using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
920 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
921 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
922 	WorkgroupMemory workgroupMemory;
923 
924 private:
925 	const bool robustBufferAccess;
926 
927 	Function::ID entryPoint;
928 	spv::ExecutionModel executionModel = spv::ExecutionModelMax;  // Invalid prior to OpEntryPoint parsing.
929 	ExecutionModes executionModes = {};
930 	Capabilities capabilities = {};
931 	spv::AddressingModel addressingModel = spv::AddressingModelLogical;
932 	spv::MemoryModel memoryModel = spv::MemoryModelSimple;
933 	HandleMap<Extension> extensionsByID;
934 	std::unordered_set<uint32_t> extensionsImported;
935 
936 	Analysis analysis = {};
937 	mutable bool imageWriteEmitted = false;
938 
939 	HandleMap<Type> types;
940 	HandleMap<Object> defs;
941 	HandleMap<Function> functions;
942 	std::unordered_map<StringID, String> strings;
943 
944 	std::shared_ptr<SpirvProfiler> profiler;
945 
IsProfilingEnabled() const946 	bool IsProfilingEnabled() const
947 	{
948 		return profiler != nullptr;
949 	}
950 
951 	// DeclareType creates a Type for the given OpTypeX instruction, storing
952 	// it into the types map. It is called from the analysis pass (constructor).
953 	void DeclareType(InsnIterator insn);
954 
955 	void ProcessExecutionMode(InsnIterator it);
956 
957 	uint32_t ComputeTypeSize(InsnIterator insn);
958 	Decorations GetDecorationsForId(TypeOrObjectID id) const;
959 	void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const;
960 	void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const;
961 	void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, const Span &indexIds) const;
962 
963 	// Creates an Object for the instruction's result in 'defs'.
964 	void DefineResult(const InsnIterator &insn);
965 
966 	// Processes the OpenCL.Debug.100 instruction for the initial definition
967 	// pass of the SPIR-V.
968 	void DefineOpenCLDebugInfo100(const InsnIterator &insn);
969 
970 	// Returns true if data in the given storage class is word-interleaved
971 	// by each SIMD vector lane, otherwise data is stored linerally.
972 	//
973 	// Each lane addresses a single word, picked by a base pointer and an
974 	// integer offset.
975 	//
976 	// A word is currently 32 bits (single float, int32_t, uint32_t).
977 	// A lane is a single element of a SIMD vector register.
978 	//
979 	// Storage interleaved by lane - (IsStorageInterleavedByLane() == true):
980 	// ---------------------------------------------------------------------
981 	//
982 	// Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex)
983 	//
984 	// Assuming SIMD::Width == 4:
985 	//
986 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
987 	//                 ===========+===========+===========+==========
988 	//  LaneOffset=0: |  Word[0]  |  Word[1]  |  Word[2]  |  Word[3]
989 	// ---------------+-----------+-----------+-----------+----------
990 	//  LaneOffset=1: |  Word[4]  |  Word[5]  |  Word[6]  |  Word[7]
991 	// ---------------+-----------+-----------+-----------+----------
992 	//  LaneOffset=2: |  Word[8]  |  Word[9]  |  Word[a]  |  Word[b]
993 	// ---------------+-----------+-----------+-----------+----------
994 	//  LaneOffset=3: |  Word[c]  |  Word[d]  |  Word[e]  |  Word[f]
995 	//
996 	//
997 	// Linear storage - (IsStorageInterleavedByLane() == false):
998 	// ---------------------------------------------------------
999 	//
1000 	// Address = PtrBase + sizeof(Word) * LaneOffset
1001 	//
1002 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
1003 	//                 ===========+===========+===========+==========
1004 	//  LaneOffset=0: |  Word[0]  |  Word[0]  |  Word[0]  |  Word[0]
1005 	// ---------------+-----------+-----------+-----------+----------
1006 	//  LaneOffset=1: |  Word[1]  |  Word[1]  |  Word[1]  |  Word[1]
1007 	// ---------------+-----------+-----------+-----------+----------
1008 	//  LaneOffset=2: |  Word[2]  |  Word[2]  |  Word[2]  |  Word[2]
1009 	// ---------------+-----------+-----------+-----------+----------
1010 	//  LaneOffset=3: |  Word[3]  |  Word[3]  |  Word[3]  |  Word[3]
1011 	//
1012 	static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
1013 	static bool IsExplicitLayout(spv::StorageClass storageClass);
1014 
1015 	static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p);
1016 
1017 	// Output storage buffers and images should not be affected by helper invocations
1018 	static bool StoresInHelperInvocation(spv::StorageClass storageClass);
1019 
1020 	using InterfaceVisitor = std::function<void(Decorations const, AttribType)>;
1021 
1022 	void VisitInterface(Object::ID id, const InterfaceVisitor &v) const;
1023 
1024 	int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const;
1025 
1026 	// MemoryElement describes a scalar element within a structure, and is
1027 	// used by the callback function of VisitMemoryObject().
1028 	struct MemoryElement
1029 	{
1030 		uint32_t index;    // index of the scalar element
1031 		uint32_t offset;   // offset (in bytes) from the base of the object
1032 		const Type &type;  // element type
1033 	};
1034 
1035 	using MemoryVisitor = std::function<void(const MemoryElement &)>;
1036 
1037 	// VisitMemoryObject() walks a type tree in an explicitly laid out
1038 	// storage class, calling the MemoryVisitor for each scalar element
1039 	// within the
1040 	void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const;
1041 
1042 	// VisitMemoryObjectInner() is internally called by VisitMemoryObject()
1043 	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const;
1044 
1045 	Object &CreateConstant(InsnIterator it);
1046 
1047 	void ProcessInterfaceVariable(Object &object);
1048 
1049 	// EmitState holds control-flow state for the emit() pass.
1050 	class EmitState
1051 	{
1052 	public:
EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,unsigned int multiSampleCount)1053 		EmitState(SpirvRoutine *routine,
1054 		          Function::ID function,
1055 		          RValue<SIMD::Int> activeLaneMask,
1056 		          RValue<SIMD::Int> storesAndAtomicsMask,
1057 		          const vk::DescriptorSet::Bindings &descriptorSets,
1058 		          unsigned int multiSampleCount)
1059 		    : routine(routine)
1060 		    , function(function)
1061 		    , activeLaneMaskValue(activeLaneMask.value())
1062 		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value())
1063 		    , descriptorSets(descriptorSets)
1064 		    , multiSampleCount(multiSampleCount)
1065 		{
1066 		}
1067 
1068 		// Returns the mask describing the active lanes as updated by dynamic
1069 		// control flow. Active lanes include helper invocations, used for
1070 		// calculating fragment derivitives, which must not perform memory
1071 		// stores or atomic writes.
1072 		//
1073 		// Use activeStoresAndAtomicsMask() to consider both control flow and
1074 		// lanes which are permitted to perform memory stores and atomic
1075 		// operations
activeLaneMask() const1076 		RValue<SIMD::Int> activeLaneMask() const
1077 		{
1078 			ASSERT(activeLaneMaskValue != nullptr);
1079 			return RValue<SIMD::Int>(activeLaneMaskValue);
1080 		}
1081 
1082 		// Returns the immutable lane mask that describes which lanes are
1083 		// permitted to perform memory stores and atomic operations.
1084 		// Note that unlike activeStoresAndAtomicsMask() this mask *does not*
1085 		// consider lanes that have been made inactive due to control flow.
storesAndAtomicsMask() const1086 		RValue<SIMD::Int> storesAndAtomicsMask() const
1087 		{
1088 			ASSERT(storesAndAtomicsMaskValue != nullptr);
1089 			return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
1090 		}
1091 
1092 		// Returns a lane mask that describes which lanes are permitted to
1093 		// perform memory stores and atomic operations, considering lanes that
1094 		// may have been made inactive due to control flow.
activeStoresAndAtomicsMask() const1095 		RValue<SIMD::Int> activeStoresAndAtomicsMask() const
1096 		{
1097 			return activeLaneMask() & storesAndAtomicsMask();
1098 		}
1099 
1100 		// Add a new active lane mask edge from the current block to out.
1101 		// The edge mask value will be (mask AND activeLaneMaskValue).
1102 		// If multiple active lane masks are added for the same edge, then
1103 		// they will be ORed together.
1104 		void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask);
1105 
1106 		// Add a new active lane mask for the edge from -> to.
1107 		// If multiple active lane masks are added for the same edge, then
1108 		// they will be ORed together.
1109 		void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
1110 
1111 		SpirvRoutine *routine = nullptr;                 // The current routine being built.
1112 		Function::ID function;                           // The current function being built.
1113 		Block::ID block;                                 // The current block being built.
1114 		rr::Value *activeLaneMaskValue = nullptr;        // The current active lane mask.
1115 		rr::Value *storesAndAtomicsMaskValue = nullptr;  // The current atomics mask.
1116 		Block::Set visited;                              // Blocks already built.
1117 		std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
1118 		std::deque<Block::ID> *pending;
1119 
1120 		const vk::DescriptorSet::Bindings &descriptorSets;
1121 
getMultiSampleCount() const1122 		unsigned int getMultiSampleCount() const { return multiSampleCount; }
1123 
createIntermediate(Object::ID id,uint32_t componentCount)1124 		Intermediate &createIntermediate(Object::ID id, uint32_t componentCount)
1125 		{
1126 			auto it = intermediates.emplace(std::piecewise_construct,
1127 			                                std::forward_as_tuple(id),
1128 			                                std::forward_as_tuple(componentCount));
1129 			ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
1130 			return it.first->second;
1131 		}
1132 
getIntermediate(Object::ID id) const1133 		Intermediate const &getIntermediate(Object::ID id) const
1134 		{
1135 			auto it = intermediates.find(id);
1136 			ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
1137 			return it->second;
1138 		}
1139 
createPointer(Object::ID id,SIMD::Pointer ptr)1140 		void createPointer(Object::ID id, SIMD::Pointer ptr)
1141 		{
1142 			bool added = pointers.emplace(id, ptr).second;
1143 			ASSERT_MSG(added, "Pointer %d created twice", id.value());
1144 		}
1145 
getPointer(Object::ID id) const1146 		SIMD::Pointer const &getPointer(Object::ID id) const
1147 		{
1148 			auto it = pointers.find(id);
1149 			ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
1150 			return it->second;
1151 		}
1152 
1153 	private:
1154 		std::unordered_map<Object::ID, Intermediate> intermediates;
1155 		std::unordered_map<Object::ID, SIMD::Pointer> pointers;
1156 
1157 		const unsigned int multiSampleCount;
1158 	};
1159 
1160 	// EmitResult is an enumerator of result values from the Emit functions.
1161 	enum class EmitResult
1162 	{
1163 		Continue,    // No termination instructions.
1164 		Terminator,  // Reached a termination instruction.
1165 	};
1166 
1167 	// Generic wrapper over either per-lane intermediate value, or a constant.
1168 	// Constants are transparently widened to per-lane values in operator[].
1169 	// This is appropriate in most cases -- if we're not going to do something
1170 	// significantly different based on whether the value is uniform across lanes.
1171 	class Operand
1172 	{
1173 	public:
1174 		Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId);
1175 		Operand(const Intermediate &value);
1176 
Float(uint32_t i) const1177 		RValue<SIMD::Float> Float(uint32_t i) const
1178 		{
1179 			if(intermediate)
1180 			{
1181 				return intermediate->Float(i);
1182 			}
1183 
1184 			// Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact
1185 			// bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant".
1186 			// Thus we must first construct an integer constant, and bitcast to float.
1187 			return As<SIMD::Float>(SIMD::UInt(constant[i]));
1188 		}
1189 
Int(uint32_t i) const1190 		RValue<SIMD::Int> Int(uint32_t i) const
1191 		{
1192 			if(intermediate)
1193 			{
1194 				return intermediate->Int(i);
1195 			}
1196 
1197 			return SIMD::Int(constant[i]);
1198 		}
1199 
UInt(uint32_t i) const1200 		RValue<SIMD::UInt> UInt(uint32_t i) const
1201 		{
1202 			if(intermediate)
1203 			{
1204 				return intermediate->UInt(i);
1205 			}
1206 
1207 			return SIMD::UInt(constant[i]);
1208 		}
1209 
1210 	private:
1211 		RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1212 
1213 		// Delegate constructor
1214 		Operand(const EmitState *state, const Object &object);
1215 
1216 		const uint32_t *constant;
1217 		const Intermediate *intermediate;
1218 
1219 	public:
1220 		const uint32_t componentCount;
1221 	};
1222 
RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1223 	RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1224 
1225 	Type const &getType(Type::ID id) const
1226 	{
1227 		auto it = types.find(id);
1228 		ASSERT_MSG(it != types.end(), "Unknown type %d", id.value());
1229 		return it->second;
1230 	}
1231 
getType(const Object & object) const1232 	Type const &getType(const Object &object) const
1233 	{
1234 		return getType(object.typeId());
1235 	}
1236 
getObject(Object::ID id) const1237 	Object const &getObject(Object::ID id) const
1238 	{
1239 		auto it = defs.find(id);
1240 		ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value());
1241 		return it->second;
1242 	}
1243 
getObjectType(Object::ID id) const1244 	Type const &getObjectType(Object::ID id) const
1245 	{
1246 		return getType(getObject(id));
1247 	}
1248 
getFunction(Function::ID id) const1249 	Function const &getFunction(Function::ID id) const
1250 	{
1251 		auto it = functions.find(id);
1252 		ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value());
1253 		return it->second;
1254 	}
1255 
getString(StringID id) const1256 	String const &getString(StringID id) const
1257 	{
1258 		auto it = strings.find(id);
1259 		ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value());
1260 		return it->second;
1261 	}
1262 
getExtension(Extension::ID id) const1263 	Extension const &getExtension(Extension::ID id) const
1264 	{
1265 		auto it = extensionsByID.find(id);
1266 		ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value());
1267 		return it->second;
1268 	}
1269 
1270 	// Returns a SIMD::Pointer to the underlying data for the given pointer
1271 	// object.
1272 	// Handles objects of the following kinds:
1273 	//  - DescriptorSet
1274 	//  - Pointer
1275 	//  - InterfaceVariable
1276 	// Calling GetPointerToData with objects of any other kind will assert.
1277 	SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const;
1278 
1279 	OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const;
1280 
1281 	SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, const Span &indexIds, const EmitState *state) const;
1282 	SIMD::Pointer WalkAccessChain(Object::ID id, const Span &indexIds, const EmitState *state) const;
1283 
1284 	// Returns the *component* offset in the literal for the given access chain.
1285 	uint32_t WalkLiteralAccessChain(Type::ID id, const Span &indexes) const;
1286 
1287 	// Lookup the active lane mask for the edge from -> to.
1288 	// If from is unreachable, then a mask of all zeros is returned.
1289 	// Asserts if from is reachable and the edge does not exist.
1290 	RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const;
1291 
1292 	// Updates the current active lane mask.
1293 	void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1294 	void SetStoresAndAtomicsMask(RValue<SIMD::Int> mask, EmitState *state) const;
1295 
1296 	// Emit all the unvisited blocks (except for ignore) in DFS order,
1297 	// starting with id.
1298 	void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const;
1299 	void EmitNonLoop(EmitState *state) const;
1300 	void EmitLoop(EmitState *state) const;
1301 
1302 	void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const;
1303 	EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const;
1304 
1305 	// Emit pass instructions:
1306 	EmitResult EmitVariable(InsnIterator insn, EmitState *state) const;
1307 	EmitResult EmitLoad(InsnIterator insn, EmitState *state) const;
1308 	EmitResult EmitStore(InsnIterator insn, EmitState *state) const;
1309 	EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const;
1310 	EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const;
1311 	EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const;
1312 	EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const;
1313 	EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const;
1314 	EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const;
1315 	EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const;
1316 	EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const;
1317 	EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const;
1318 	EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const;
1319 	EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const;
1320 	EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const;
1321 	EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const;
1322 	EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const;
1323 	EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const;
1324 	EmitResult EmitDot(InsnIterator insn, EmitState *state) const;
1325 	EmitResult EmitSelect(InsnIterator insn, EmitState *state) const;
1326 	EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const;
1327 	EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const;
1328 	EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const;
1329 	EmitResult EmitLine(InsnIterator insn, EmitState *state) const;
1330 	EmitResult EmitAny(InsnIterator insn, EmitState *state) const;
1331 	EmitResult EmitAll(InsnIterator insn, EmitState *state) const;
1332 	EmitResult EmitBranch(InsnIterator insn, EmitState *state) const;
1333 	EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const;
1334 	EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const;
1335 	EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const;
1336 	EmitResult EmitReturn(InsnIterator insn, EmitState *state) const;
1337 	EmitResult EmitTerminateInvocation(InsnIterator insn, EmitState *state) const;
1338 	EmitResult EmitDemoteToHelperInvocation(InsnIterator insn, EmitState *state) const;
1339 	EmitResult EmitIsHelperInvocation(InsnIterator insn, EmitState *state) const;
1340 	EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const;
1341 	EmitResult EmitPhi(InsnIterator insn, EmitState *state) const;
1342 	EmitResult EmitImageSample(const ImageInstruction &instruction, EmitState *state) const;
1343 	EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const;
1344 	EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const;
1345 	EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const;
1346 	EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const;
1347 	EmitResult EmitImageRead(const ImageInstruction &instruction, EmitState *state) const;
1348 	EmitResult EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const;
1349 	EmitResult EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const;
1350 	EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
1351 	EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
1352 	EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
1353 	EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const;
1354 	EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
1355 	EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const;
1356 	EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
1357 	EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
1358 	EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
1359 
1360 	// Emits code to sample an image, regardless of whether any SIMD lanes are active.
1361 	void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const;
1362 
1363 	Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const;
1364 	void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const;
1365 
1366 	void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
1367 	static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state);
1368 	static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat);
1369 	uint32_t GetConstScalarInt(Object::ID id) const;
1370 	void EvalSpecConstantOp(InsnIterator insn);
1371 	void EvalSpecConstantUnaryOp(InsnIterator insn);
1372 	void EvalSpecConstantBinaryOp(InsnIterator insn);
1373 
1374 	// Fragment input interpolation functions
1375 	uint32_t GetNumInputComponents(int32_t location) const;
1376 	uint32_t GetPackedInterpolant(int32_t location) const;
1377 	enum InterpolationType
1378 	{
1379 		Centroid,
1380 		AtSample,
1381 		AtOffset,
1382 	};
1383 	SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
1384 	                        uint32_t component, EmitState *state, InterpolationType type) const;
1385 
1386 	// Helper for implementing OpStore, which doesn't take an InsnIterator so it
1387 	// can also store independent operands.
1388 	void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const;
1389 
1390 	// LoadPhi loads the phi values from the alloca storage and places the
1391 	// load values into the intermediate with the phi's result id.
1392 	void LoadPhi(InsnIterator insn, EmitState *state) const;
1393 
1394 	// StorePhi updates the phi's alloca storage value using the incoming
1395 	// values from blocks that are both in the OpPhi instruction and in
1396 	// filter.
1397 	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const;
1398 
1399 	// Emits a rr::Fence for the given MemorySemanticsMask.
1400 	void Fence(spv::MemorySemanticsMask semantics) const;
1401 
1402 	// Helper for calling rr::Yield with res cast to an rr::Int.
1403 	void Yield(YieldResult res) const;
1404 
1405 	// WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's
1406 	// control flow to the given file path.
1407 	void WriteCFGGraphVizDotFile(const char *path) const;
1408 
1409 	// OpcodeName() returns the name of the opcode op.
1410 	static const char *OpcodeName(spv::Op op);
1411 	static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
1412 
1413 	// IsStatement() returns true if the given opcode actually performs
1414 	// work (as opposed to declaring a type, defining a function start / end,
1415 	// etc).
1416 	static bool IsStatement(spv::Op op);
1417 
1418 	// HasTypeAndResult() returns true if the given opcode's instruction
1419 	// has a result type ID and result ID, i.e. defines an Object.
1420 	static bool HasTypeAndResult(spv::Op op);
1421 
1422 	// Helper as we often need to take dot products as part of doing other things.
1423 	static SIMD::Float FDot(unsigned numComponents, Operand const &x, Operand const &y);
1424 	static SIMD::Int SDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum);
1425 	static SIMD::UInt UDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum);
1426 	static SIMD::Int SUDot(unsigned numComponents, Operand const &x, Operand const &y, Operand const *accum);
1427 	static SIMD::Int AddSat(RValue<SIMD::Int> a, RValue<SIMD::Int> b);
1428 	static SIMD::UInt AddSat(RValue<SIMD::UInt> a, RValue<SIMD::UInt> b);
1429 
1430 	// Splits x into a floating-point significand in the range [0.5, 1.0)
1431 	// and an integral exponent of two, such that:
1432 	//   x = significand * 2^exponent
1433 	// Returns the pair <significand, exponent>
1434 	std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
1435 
1436 	static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId);
1437 	static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState);
1438 	static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState);
1439 
1440 	// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
1441 	static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod);
1442 	static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState);
1443 	static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType);
1444 
1445 	// Returns 0 when invalid.
1446 	static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
1447 
1448 	// Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these
1449 	// are all no-ops.
1450 
1451 	// dbgInit() initializes the debugger code generation.
1452 	// All other dbgXXX() functions are no-op until this is called.
1453 	void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx);
1454 
1455 	// dbgTerm() terminates the debugger code generation.
1456 	void dbgTerm();
1457 
1458 	// dbgCreateFile() generates a synthetic file containing the disassembly
1459 	// of the SPIR-V shader. This is the file displayed in the debug
1460 	// session.
1461 	void dbgCreateFile();
1462 
1463 	// dbgBeginEmit() sets up the debugging state for the shader.
1464 	void dbgBeginEmit(EmitState *state) const;
1465 
1466 	// dbgEndEmit() tears down the debugging state for the shader.
1467 	void dbgEndEmit(EmitState *state) const;
1468 
1469 	// dbgBeginEmitInstruction() updates the current debugger location for
1470 	// the given instruction.
1471 	void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const;
1472 
1473 	// dbgEndEmitInstruction() creates any new debugger variables for the
1474 	// instruction that just completed.
1475 	void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const;
1476 
1477 	// dbgExposeIntermediate() exposes the intermediate with the given ID to
1478 	// the debugger.
1479 	void dbgExposeIntermediate(Object::ID id, EmitState *state) const;
1480 
1481 	// dbgUpdateActiveLaneMask() updates the active lane masks to the
1482 	// debugger.
1483 	void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1484 
1485 	// dbgDeclareResult() associates resultId as the result of the given
1486 	// instruction.
1487 	void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const;
1488 
1489 	// Impl holds forward declaration structs and pointers to state for the
1490 	// private implementations in the corresponding SpirvShaderXXX.cpp files.
1491 	// This allows access to the private members of the SpirvShader, without
1492 	// littering the header with implementation details.
1493 	struct Impl
1494 	{
1495 		struct Debugger;
1496 		struct Group;
1497 		Debugger *debugger = nullptr;
1498 	};
1499 	Impl impl;
1500 };
1501 
1502 class SpirvRoutine
1503 {
1504 public:
1505 	SpirvRoutine(vk::PipelineLayout const *pipelineLayout);
1506 
1507 	using Variable = Array<SIMD::Float>;
1508 
1509 	// Single-entry 'inline' sampler routine cache.
1510 	struct SamplerCache
1511 	{
1512 		Pointer<Byte> imageDescriptor = nullptr;
1513 		Int samplerId;
1514 
1515 		Pointer<Byte> function;
1516 	};
1517 
1518 	struct InterpolationData
1519 	{
1520 		Pointer<Byte> primitive;
1521 		SIMD::Float x;
1522 		SIMD::Float y;
1523 		SIMD::Float rhw;
1524 		SIMD::Float xCentroid;
1525 		SIMD::Float yCentroid;
1526 		SIMD::Float rhwCentroid;
1527 	};
1528 
1529 	vk::PipelineLayout const *const pipelineLayout;
1530 
1531 	std::unordered_map<SpirvShader::Object::ID, Variable> variables;
1532 	std::unordered_map<uint32_t, SamplerCache> samplerCache;  // Indexed by the instruction position, in words.
1533 	SIMD::Float inputs[MAX_INTERFACE_COMPONENTS];
1534 	SIMD::Float outputs[MAX_INTERFACE_COMPONENTS];
1535 	InterpolationData interpolationData;
1536 
1537 	Pointer<Byte> device;
1538 	Pointer<Byte> workgroupMemory;
1539 	Pointer<Pointer<Byte>> descriptorSets;
1540 	Pointer<Int> descriptorDynamicOffsets;
1541 	Pointer<Byte> pushConstants;
1542 	Pointer<Byte> constants;
1543 	Int discardMask = 0;
1544 
1545 	// Shader invocation state.
1546 	// Not all of these variables are used for every type of shader, and some
1547 	// are only used when debugging. See b/146486064 for more information.
1548 	// Give careful consideration to the runtime performance loss before adding
1549 	// more state here.
1550 	std::array<SIMD::Int, 2> windowSpacePosition;
1551 	Int layer;  // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
1552 	Int instanceID;
1553 	SIMD::Int vertexIndex;
1554 	std::array<SIMD::Float, 4> fragCoord;
1555 	std::array<SIMD::Float, 4> pointCoord;
1556 	SIMD::Int helperInvocation;
1557 	Int4 numWorkgroups;
1558 	Int4 workgroupID;
1559 	Int4 workgroupSize;
1560 	Int subgroupsPerWorkgroup;
1561 	Int invocationsPerSubgroup;
1562 	Int subgroupIndex;
1563 	SIMD::Int localInvocationIndex;
1564 	std::array<SIMD::Int, 3> localInvocationID;
1565 	std::array<SIMD::Int, 3> globalInvocationID;
1566 
1567 	Pointer<Byte> dbgState;  // Pointer to a debugger state.
1568 
createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1569 	void createVariable(SpirvShader::Object::ID id, uint32_t componentCount)
1570 	{
1571 		bool added = variables.emplace(id, Variable(componentCount)).second;
1572 		ASSERT_MSG(added, "Variable %d created twice", id.value());
1573 	}
1574 
getVariable(SpirvShader::Object::ID id)1575 	Variable &getVariable(SpirvShader::Object::ID id)
1576 	{
1577 		auto it = variables.find(id);
1578 		ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
1579 		return it->second;
1580 	}
1581 
1582 	// setImmutableInputBuiltins() sets all the immutable input builtins,
1583 	// common for all shader types.
1584 	void setImmutableInputBuiltins(SpirvShader const *shader);
1585 
1586 	static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
1587 
1588 	// setInputBuiltin() calls f() with the builtin and value if the shader
1589 	// uses the input builtin, otherwise the call is a no-op.
1590 	// F is a function with the signature:
1591 	// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
1592 	template<typename F>
setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1593 	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f)
1594 	{
1595 		auto it = shader->inputBuiltins.find(id);
1596 		if(it != shader->inputBuiltins.end())
1597 		{
1598 			const auto &builtin = it->second;
1599 			f(builtin, getVariable(builtin.Id));
1600 		}
1601 	}
1602 
1603 private:
1604 	// The phis and the profile data are only accessible to SpirvShader
1605 	// as they are only used and exist between calls to
1606 	// SpirvShader::emitProlog() and SpirvShader::emitEpilog().
1607 	friend class SpirvShader;
1608 
1609 	std::unordered_map<SpirvShader::Object::ID, Variable> phis;
1610 	std::unique_ptr<SpirvProfileData> profData;
1611 };
1612 
1613 }  // namespace sw
1614 
1615 #endif  // sw_SpirvShader_hpp
1616