• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_SpirvShader_hpp
16 #define sw_SpirvShader_hpp
17 
18 #include "SamplerCore.hpp"
19 #include "ShaderCore.hpp"
20 #include "SpirvBinary.hpp"
21 #include "SpirvID.hpp"
22 #include "Device/Config.hpp"
23 #include "Device/Sampler.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Math.hpp"
26 #include "System/Types.hpp"
27 #include "Vulkan/VkConfig.hpp"
28 #include "Vulkan/VkDescriptorSet.hpp"
29 
30 #define SPV_ENABLE_UTILITY_CODE
31 #include <spirv/unified1/spirv.hpp>
32 
33 #include <array>
34 #include <atomic>
35 #include <cstdint>
36 #include <cstring>
37 #include <deque>
38 #include <functional>
39 #include <memory>
40 #include <string>
41 #include <type_traits>
42 #include <unordered_map>
43 #include <unordered_set>
44 #include <vector>
45 
46 #undef Yield  // b/127920555
47 
48 namespace vk {
49 
50 class Device;
51 class PipelineLayout;
52 class ImageView;
53 class Sampler;
54 class RenderPass;
55 struct SampledImageDescriptor;
56 struct SamplerState;
57 
58 namespace dbg {
59 class Context;
60 }  // namespace dbg
61 
62 }  // namespace vk
63 
64 namespace sw {
65 
66 // Forward declarations.
67 class SpirvRoutine;
68 
69 // Incrementally constructed complex bundle of rvalues
70 // Effectively a restricted vector, supporting only:
71 // - allocation to a (runtime-known) fixed component count
72 // - in-place construction of elements
73 // - const operator[]
74 class Intermediate
75 {
76 public:
Intermediate(uint32_t componentCount)77 	Intermediate(uint32_t componentCount)
78 	    : componentCount(componentCount)
79 	    , scalar(new rr::Value *[componentCount])
80 	{
81 		for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; }
82 	}
83 
~Intermediate()84 	~Intermediate()
85 	{
86 		delete[] scalar;
87 	}
88 
89 	// TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to
90 	// decide the format used to print the intermediate data.
91 	enum class TypeHint
92 	{
93 		Float,
94 		Int,
95 		UInt
96 	};
97 
move(uint32_t i,RValue<SIMD::Float> && scalar)98 	void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,RValue<SIMD::Int> && scalar)99 	void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,RValue<SIMD::UInt> && scalar)100 	void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
101 
move(uint32_t i,const RValue<SIMD::Float> & scalar)102 	void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); }
move(uint32_t i,const RValue<SIMD::Int> & scalar)103 	void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); }
move(uint32_t i,const RValue<SIMD::UInt> & scalar)104 	void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); }
105 
106 	// Value retrieval functions.
Float(uint32_t i) const107 	RValue<SIMD::Float> Float(uint32_t i) const
108 	{
109 		ASSERT(i < componentCount);
110 		ASSERT(scalar[i] != nullptr);
111 		return As<SIMD::Float>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Float>(scalar)
112 	}
113 
Int(uint32_t i) const114 	RValue<SIMD::Int> Int(uint32_t i) const
115 	{
116 		ASSERT(i < componentCount);
117 		ASSERT(scalar[i] != nullptr);
118 		return As<SIMD::Int>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::Int>(scalar)
119 	}
120 
UInt(uint32_t i) const121 	RValue<SIMD::UInt> UInt(uint32_t i) const
122 	{
123 		ASSERT(i < componentCount);
124 		ASSERT(scalar[i] != nullptr);
125 		return As<SIMD::UInt>(scalar[i]);  // TODO(b/128539387): RValue<SIMD::UInt>(scalar)
126 	}
127 
128 	// No copy/move construction or assignment
129 	Intermediate(Intermediate const &) = delete;
130 	Intermediate(Intermediate &&) = delete;
131 	Intermediate &operator=(Intermediate const &) = delete;
132 	Intermediate &operator=(Intermediate &&) = delete;
133 
134 	const uint32_t componentCount;
135 
136 private:
emplace(uint32_t i,rr::Value * value,TypeHint type)137 	void emplace(uint32_t i, rr::Value *value, TypeHint type)
138 	{
139 		ASSERT(i < componentCount);
140 		ASSERT(scalar[i] == nullptr);
141 		scalar[i] = value;
142 		RR_PRINT_ONLY(typeHint = type;)
143 	}
144 
145 	rr::Value **const scalar;
146 
147 #ifdef ENABLE_RR_PRINT
148 	friend struct rr::PrintValue::Ty<sw::Intermediate>;
149 	TypeHint typeHint = TypeHint::Float;
150 #endif  // ENABLE_RR_PRINT
151 };
152 
153 class SpirvShader
154 {
155 public:
156 	SpirvBinary insns;
157 
158 	using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants);
159 
160 	enum class YieldResult
161 	{
162 		ControlBarrier,
163 	};
164 
165 	class Type;
166 	class Object;
167 
168 	// Pseudo-iterator over SPIRV instructions, designed to support range-based-for.
169 	class InsnIterator
170 	{
171 	public:
172 		InsnIterator(InsnIterator const &other) = default;
173 
174 		InsnIterator() = default;
175 
InsnIterator(SpirvBinary::const_iterator iter)176 		explicit InsnIterator(SpirvBinary::const_iterator iter)
177 		    : iter{ iter }
178 		{
179 		}
180 
opcode() const181 		spv::Op opcode() const
182 		{
183 			return static_cast<spv::Op>(*iter & spv::OpCodeMask);
184 		}
185 
wordCount() const186 		uint32_t wordCount() const
187 		{
188 			return *iter >> spv::WordCountShift;
189 		}
190 
word(uint32_t n) const191 		uint32_t word(uint32_t n) const
192 		{
193 			ASSERT(n < wordCount());
194 			return iter[n];
195 		}
196 
wordPointer(uint32_t n) const197 		uint32_t const *wordPointer(uint32_t n) const
198 		{
199 			return &iter[n];
200 		}
201 
string(uint32_t n) const202 		const char *string(uint32_t n) const
203 		{
204 			return reinterpret_cast<const char *>(wordPointer(n));
205 		}
206 
207 		// Returns the number of whole-words that a string literal starting at
208 		// word n consumes. If the end of the intruction is reached before the
209 		// null-terminator is found, then the function DABORT()s and 0 is
210 		// returned.
stringSizeInWords(uint32_t n) const211 		uint32_t stringSizeInWords(uint32_t n) const
212 		{
213 			uint32_t c = wordCount();
214 			for(uint32_t i = n; n < c; i++)
215 			{
216 				auto *u32 = wordPointer(i);
217 				auto *u8 = reinterpret_cast<const uint8_t *>(u32);
218 				// SPIR-V spec 2.2.1. Instructions:
219 				// A string is interpreted as a nul-terminated stream of
220 				// characters. The character set is Unicode in the UTF-8
221 				// encoding scheme. The UTF-8 octets (8-bit bytes) are packed
222 				// four per word, following the little-endian convention (i.e.,
223 				// the first octet is in the lowest-order 8 bits of the word).
224 				// The final word contains the string’s nul-termination
225 				// character (0), and all contents past the end of the string in
226 				// the final word are padded with 0.
227 				if(u8[3] == 0)
228 				{
229 					return 1 + i - n;
230 				}
231 			}
232 			DABORT("SPIR-V string literal was not null-terminated");
233 			return 0;
234 		}
235 
hasResultAndType() const236 		bool hasResultAndType() const
237 		{
238 			bool hasResult = false, hasResultType = false;
239 			spv::HasResultAndType(opcode(), &hasResult, &hasResultType);
240 
241 			return hasResultType;
242 		}
243 
resultTypeId() const244 		SpirvID<Type> resultTypeId() const
245 		{
246 			ASSERT(hasResultAndType());
247 			return word(1);
248 		}
249 
resultId() const250 		SpirvID<Object> resultId() const
251 		{
252 			ASSERT(hasResultAndType());
253 			return word(2);
254 		}
255 
distanceFrom(const InsnIterator & other) const256 		uint32_t distanceFrom(const InsnIterator &other) const
257 		{
258 			return static_cast<uint32_t>(iter - other.iter);
259 		}
260 
operator ==(InsnIterator const & other) const261 		bool operator==(InsnIterator const &other) const
262 		{
263 			return iter == other.iter;
264 		}
265 
operator !=(InsnIterator const & other) const266 		bool operator!=(InsnIterator const &other) const
267 		{
268 			return iter != other.iter;
269 		}
270 
operator *() const271 		InsnIterator operator*() const
272 		{
273 			return *this;
274 		}
275 
operator ++()276 		InsnIterator &operator++()
277 		{
278 			iter += wordCount();
279 			return *this;
280 		}
281 
operator ++(int)282 		InsnIterator const operator++(int)
283 		{
284 			InsnIterator ret{ *this };
285 			iter += wordCount();
286 			return ret;
287 		}
288 
289 	private:
290 		SpirvBinary::const_iterator iter;
291 	};
292 
293 	// Range-based-for interface
begin() const294 	InsnIterator begin() const
295 	{
296 		// Skip over the header words
297 		return InsnIterator{ insns.cbegin() + 5 };
298 	}
299 
end() const300 	InsnIterator end() const
301 	{
302 		return InsnIterator{ insns.cend() };
303 	}
304 
305 	class Type
306 	{
307 	public:
308 		using ID = SpirvID<Type>;
309 
opcode() const310 		spv::Op opcode() const { return definition.opcode(); }
311 
312 		InsnIterator definition;
313 		spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1);
314 		uint32_t componentCount = 0;
315 		bool isBuiltInBlock = false;
316 
317 		// Inner element type for pointers, arrays, vectors and matrices.
318 		ID element;
319 	};
320 
321 	class Object
322 	{
323 	public:
324 		using ID = SpirvID<Object>;
325 
opcode() const326 		spv::Op opcode() const { return definition.opcode(); }
typeId() const327 		Type::ID typeId() const { return definition.resultTypeId(); }
id() const328 		Object::ID id() const { return definition.resultId(); }
329 
330 		bool isConstantZero() const;
331 
332 		InsnIterator definition;
333 		std::vector<uint32_t> constantValue;
334 
335 		enum class Kind
336 		{
337 			// Invalid default kind.
338 			// If we get left with an object in this state, the module was
339 			// broken.
340 			Unknown,
341 
342 			// TODO: Better document this kind.
343 			// A shader interface variable pointer.
344 			// Pointer with uniform address across all lanes.
345 			// Pointer held by SpirvRoutine::pointers
346 			InterfaceVariable,
347 
348 			// Constant value held by Object::constantValue.
349 			Constant,
350 
351 			// Value held by SpirvRoutine::intermediates.
352 			Intermediate,
353 
354 			// Pointer held by SpirvRoutine::pointers
355 			Pointer,
356 
357 			// A pointer to a vk::DescriptorSet*.
358 			// Pointer held by SpirvRoutine::pointers.
359 			DescriptorSet,
360 		};
361 
362 		Kind kind = Kind::Unknown;
363 	};
364 
365 	// Block is an interval of SPIR-V instructions, starting with the
366 	// opening OpLabel, and ending with a termination instruction.
367 	class Block
368 	{
369 	public:
370 		using ID = SpirvID<Block>;
371 		using Set = std::unordered_set<ID>;
372 
373 		// Edge represents the graph edge between two blocks.
374 		struct Edge
375 		{
376 			ID from;
377 			ID to;
378 
operator ==sw::SpirvShader::Block::Edge379 			bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
380 
381 			struct Hash
382 			{
operator ()sw::SpirvShader::Block::Edge::Hash383 				std::size_t operator()(const Edge &edge) const noexcept
384 				{
385 					return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
386 				}
387 			};
388 		};
389 
390 		Block() = default;
391 		Block(const Block &other) = default;
392 		explicit Block(InsnIterator begin, InsnIterator end);
393 
394 		/* range-based-for interface */
begin() const395 		inline InsnIterator begin() const { return begin_; }
end() const396 		inline InsnIterator end() const { return end_; }
397 
398 		enum Kind
399 		{
400 			Simple,                         // OpBranch or other simple terminator.
401 			StructuredBranchConditional,    // OpSelectionMerge + OpBranchConditional
402 			UnstructuredBranchConditional,  // OpBranchConditional
403 			StructuredSwitch,               // OpSelectionMerge + OpSwitch
404 			UnstructuredSwitch,             // OpSwitch
405 			Loop,                           // OpLoopMerge + [OpBranchConditional | OpBranch]
406 		};
407 
408 		Kind kind = Simple;
409 		InsnIterator mergeInstruction;   // Structured control flow merge instruction.
410 		InsnIterator branchInstruction;  // Branch instruction.
411 		ID mergeBlock;                   // Structured flow merge block.
412 		ID continueTarget;               // Loop continue block.
413 		Set ins;                         // Blocks that branch into this block.
414 		Set outs;                        // Blocks that this block branches to.
415 		bool isLoopMerge = false;
416 
417 	private:
418 		InsnIterator begin_;
419 		InsnIterator end_;
420 	};
421 
422 	class Function
423 	{
424 	public:
425 		using ID = SpirvID<Function>;
426 
427 		// Walks all reachable the blocks starting from id adding them to
428 		// reachable.
429 		void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const;
430 
431 		// AssignBlockFields() performs the following for all reachable blocks:
432 		// * Assigns Block::ins with the identifiers of all blocks that contain
433 		//   this block in their Block::outs.
434 		// * Sets Block::isLoopMerge to true if the block is the merge of a
435 		//   another loop block.
436 		void AssignBlockFields();
437 
438 		// ForeachBlockDependency calls f with each dependency of the given
439 		// block. A dependency is an incoming block that is not a loop-back
440 		// edge.
441 		void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
442 
443 		// ExistsPath returns true if there's a direct or indirect flow from
444 		// the 'from' block to the 'to' block that does not pass through
445 		// notPassingThrough.
446 		bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const;
447 
getBlock(Block::ID id) const448 		Block const &getBlock(Block::ID id) const
449 		{
450 			auto it = blocks.find(id);
451 			ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value());
452 			return it->second;
453 		}
454 
455 		Block::ID entry;          // function entry point block.
456 		HandleMap<Block> blocks;  // blocks belonging to this function.
457 		Type::ID type;            // type of the function.
458 		Type::ID result;          // return type.
459 	};
460 
461 	using String = std::string;
462 	using StringID = SpirvID<std::string>;
463 
464 	class Extension
465 	{
466 	public:
467 		using ID = SpirvID<Extension>;
468 
469 		enum Name
470 		{
471 			Unknown,
472 			GLSLstd450,
473 			OpenCLDebugInfo100
474 		};
475 
476 		Name name;
477 	};
478 
479 	struct TypeOrObject
480 	{};
481 
482 	// TypeOrObjectID is an identifier that represents a Type or an Object,
483 	// and supports implicit casting to and from Type::ID or Object::ID.
484 	class TypeOrObjectID : public SpirvID<TypeOrObject>
485 	{
486 	public:
487 		using Hash = std::hash<SpirvID<TypeOrObject>>;
488 
TypeOrObjectID(uint32_t id)489 		inline TypeOrObjectID(uint32_t id)
490 		    : SpirvID(id)
491 		{}
TypeOrObjectID(Type::ID id)492 		inline TypeOrObjectID(Type::ID id)
493 		    : SpirvID(id.value())
494 		{}
TypeOrObjectID(Object::ID id)495 		inline TypeOrObjectID(Object::ID id)
496 		    : SpirvID(id.value())
497 		{}
operator Type::ID() const498 		inline operator Type::ID() const { return Type::ID(value()); }
operator Object::ID() const499 		inline operator Object::ID() const { return Object::ID(value()); }
500 	};
501 
502 	// OpImageSample variants
503 	enum Variant : uint32_t
504 	{
505 		None,  // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod.
506 		Dref,
507 		Proj,
508 		ProjDref,
509 		VARIANT_LAST = ProjDref
510 	};
511 
512 	// Compact representation of image instruction state that is passed to the
513 	// trampoline function for retrieving/generating the corresponding sampling routine.
514 	struct ImageInstructionSignature
515 	{
ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature516 		ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod)
517 		{
518 			this->variant = variant;
519 			this->samplerMethod = samplerMethod;
520 		}
521 
522 		// Unmarshal from raw 32-bit data
ImageInstructionSignaturesw::SpirvShader::ImageInstructionSignature523 		explicit ImageInstructionSignature(uint32_t signature)
524 		    : signature(signature)
525 		{}
526 
getSamplerFunctionsw::SpirvShader::ImageInstructionSignature527 		SamplerFunction getSamplerFunction() const
528 		{
529 			return { samplerMethod, offset != 0, sample != 0 };
530 		}
531 
isDrefsw::SpirvShader::ImageInstructionSignature532 		bool isDref() const
533 		{
534 			return (variant == Dref) || (variant == ProjDref);
535 		}
536 
isProjsw::SpirvShader::ImageInstructionSignature537 		bool isProj() const
538 		{
539 			return (variant == Proj) || (variant == ProjDref);
540 		}
541 
hasLodsw::SpirvShader::ImageInstructionSignature542 		bool hasLod() const
543 		{
544 			return samplerMethod == Lod || samplerMethod == Fetch;  // We always pass a Lod operand for Fetch operations.
545 		}
546 
hasGradsw::SpirvShader::ImageInstructionSignature547 		bool hasGrad() const
548 		{
549 			return samplerMethod == Grad;
550 		}
551 
552 		union
553 		{
554 			struct
555 			{
556 				Variant variant : BITS(VARIANT_LAST);
557 				SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST);
558 				uint32_t gatherComponent : 2;
559 				uint32_t dim : BITS(spv::DimSubpassData);  // spv::Dim
560 				uint32_t arrayed : 1;
561 				uint32_t imageFormat : BITS(spv::ImageFormatR64i);  // spv::ImageFormat
562 
563 				// Parameters are passed to the sampling routine in this order:
564 				uint32_t coordinates : 3;       // 1-4 (does not contain projection component)
565 				/*	uint32_t dref : 1; */       // Indicated by Variant::ProjDref|Dref
566 				/*	uint32_t lodOrBias : 1; */  // Indicated by SamplerMethod::Lod|Bias|Fetch
567 				uint32_t grad : 2;              // 0-3 components (for each of dx / dy)
568 				uint32_t offset : 2;            // 0-3 components
569 				uint32_t sample : 1;            // 0-1 scalar integer
570 			};
571 
572 			uint32_t signature = 0;
573 		};
574 	};
575 
576 	// This gets stored as a literal in the generated code, so it should be compact.
577 	static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit");
578 
579 	struct ImageInstruction : public ImageInstructionSignature
580 	{
581 		ImageInstruction(InsnIterator insn, const SpirvShader &spirv);
582 
583 		const uint32_t position;
584 
585 		Type::ID resultTypeId = 0;
586 		Object::ID resultId = 0;
587 		Object::ID imageId = 0;
588 		Object::ID samplerId = 0;
589 		Object::ID coordinateId = 0;
590 		Object::ID texelId = 0;
591 		Object::ID drefId = 0;
592 		Object::ID lodOrBiasId = 0;
593 		Object::ID gradDxId = 0;
594 		Object::ID gradDyId = 0;
595 		Object::ID offsetId = 0;
596 		Object::ID sampleId = 0;
597 
598 	private:
599 		static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn);
600 		static uint32_t getImageOperandsIndex(InsnIterator insn);
601 		static uint32_t getImageOperandsMask(InsnIterator insn);
602 	};
603 
604 	// This method is for retrieving an ID that uniquely identifies the
605 	// shader entry point represented by this object.
getIdentifier() const606 	uint64_t getIdentifier() const
607 	{
608 		return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier();
609 	}
610 
611 	SpirvShader(VkShaderStageFlagBits stage,
612 	            const char *entryPointName,
613 	            SpirvBinary const &insns,
614 	            const vk::RenderPass *renderPass,
615 	            uint32_t subpassIndex,
616 	            bool robustBufferAccess,
617 	            const std::shared_ptr<vk::dbg::Context> &dbgctx);
618 
619 	~SpirvShader();
620 
621 	struct ExecutionModes
622 	{
623 		bool EarlyFragmentTests : 1;
624 		bool DepthReplacing : 1;
625 		bool DepthGreater : 1;
626 		bool DepthLess : 1;
627 		bool DepthUnchanged : 1;
628 
629 		// Compute workgroup dimensions
630 		int WorkgroupSizeX = 1;
631 		int WorkgroupSizeY = 1;
632 		int WorkgroupSizeZ = 1;
633 	};
634 
getExecutionModes() const635 	const ExecutionModes &getExecutionModes() const
636 	{
637 		return executionModes;
638 	}
639 
640 	struct Analysis
641 	{
642 		bool ContainsKill : 1;
643 		bool ContainsControlBarriers : 1;
644 		bool NeedsCentroid : 1;
645 		bool ContainsSampleQualifier : 1;
646 	};
647 
getAnalysis() const648 	const Analysis &getAnalysis() const
649 	{
650 		return analysis;
651 	}
652 
653 	struct Capabilities
654 	{
655 		bool Matrix : 1;
656 		bool Shader : 1;
657 		bool StorageImageMultisample : 1;
658 		bool ClipDistance : 1;
659 		bool CullDistance : 1;
660 		bool ImageCubeArray : 1;
661 		bool SampleRateShading : 1;
662 		bool InputAttachment : 1;
663 		bool Sampled1D : 1;
664 		bool Image1D : 1;
665 		bool SampledBuffer : 1;
666 		bool SampledCubeArray : 1;
667 		bool ImageBuffer : 1;
668 		bool ImageMSArray : 1;
669 		bool StorageImageExtendedFormats : 1;
670 		bool ImageQuery : 1;
671 		bool DerivativeControl : 1;
672 		bool InterpolationFunction : 1;
673 		bool StorageImageWriteWithoutFormat : 1;
674 		bool GroupNonUniform : 1;
675 		bool GroupNonUniformVote : 1;
676 		bool GroupNonUniformBallot : 1;
677 		bool GroupNonUniformShuffle : 1;
678 		bool GroupNonUniformShuffleRelative : 1;
679 		bool GroupNonUniformArithmetic : 1;
680 		bool DeviceGroup : 1;
681 		bool MultiView : 1;
682 		bool StencilExportEXT : 1;
683 	};
684 
getUsedCapabilities() const685 	const Capabilities &getUsedCapabilities() const
686 	{
687 		return capabilities;
688 	}
689 
690 	// getNumOutputClipDistances() returns the number of ClipDistances
691 	// outputted by this shader.
getNumOutputClipDistances() const692 	unsigned int getNumOutputClipDistances() const
693 	{
694 		if(getUsedCapabilities().ClipDistance)
695 		{
696 			auto it = outputBuiltins.find(spv::BuiltInClipDistance);
697 			if(it != outputBuiltins.end())
698 			{
699 				return it->second.SizeInComponents;
700 			}
701 		}
702 		return 0;
703 	}
704 
705 	// getNumOutputCullDistances() returns the number of CullDistances
706 	// outputted by this shader.
getNumOutputCullDistances() const707 	unsigned int getNumOutputCullDistances() const
708 	{
709 		if(getUsedCapabilities().CullDistance)
710 		{
711 			auto it = outputBuiltins.find(spv::BuiltInCullDistance);
712 			if(it != outputBuiltins.end())
713 			{
714 				return it->second.SizeInComponents;
715 			}
716 		}
717 		return 0;
718 	}
719 
720 	enum AttribType : unsigned char
721 	{
722 		ATTRIBTYPE_FLOAT,
723 		ATTRIBTYPE_INT,
724 		ATTRIBTYPE_UINT,
725 		ATTRIBTYPE_UNUSED,
726 
727 		ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
728 	};
729 
hasBuiltinInput(spv::BuiltIn b) const730 	bool hasBuiltinInput(spv::BuiltIn b) const
731 	{
732 		return inputBuiltins.find(b) != inputBuiltins.end();
733 	}
734 
hasBuiltinOutput(spv::BuiltIn b) const735 	bool hasBuiltinOutput(spv::BuiltIn b) const
736 	{
737 		return outputBuiltins.find(b) != outputBuiltins.end();
738 	}
739 
740 	struct Decorations
741 	{
742 		int32_t Location = -1;
743 		int32_t Component = 0;
744 		spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1);
745 		int32_t Offset = -1;
746 		int32_t ArrayStride = -1;
747 		int32_t MatrixStride = 1;
748 
749 		bool HasLocation : 1;
750 		bool HasComponent : 1;
751 		bool HasBuiltIn : 1;
752 		bool HasOffset : 1;
753 		bool HasArrayStride : 1;
754 		bool HasMatrixStride : 1;
755 		bool HasRowMajor : 1;  // whether RowMajor bit is valid.
756 
757 		bool Flat : 1;
758 		bool Centroid : 1;
759 		bool NoPerspective : 1;
760 		bool Block : 1;
761 		bool BufferBlock : 1;
762 		bool RelaxedPrecision : 1;
763 		bool RowMajor : 1;      // RowMajor if true; ColMajor if false
764 		bool InsideMatrix : 1;  // pseudo-decoration for whether we're inside a matrix.
765 
Decorationssw::SpirvShader::Decorations766 		Decorations()
767 		    : Location{ -1 }
768 		    , Component{ 0 }
769 		    , BuiltIn{ static_cast<spv::BuiltIn>(-1) }
770 		    , Offset{ -1 }
771 		    , ArrayStride{ -1 }
772 		    , MatrixStride{ -1 }
773 		    , HasLocation{ false }
774 		    , HasComponent{ false }
775 		    , HasBuiltIn{ false }
776 		    , HasOffset{ false }
777 		    , HasArrayStride{ false }
778 		    , HasMatrixStride{ false }
779 		    , HasRowMajor{ false }
780 		    , Flat{ false }
781 		    , Centroid{ false }
782 		    , NoPerspective{ false }
783 		    , Block{ false }
784 		    , BufferBlock{ false }
785 		    , RelaxedPrecision{ false }
786 		    , RowMajor{ false }
787 		    , InsideMatrix{ false }
788 		{
789 		}
790 
791 		Decorations(Decorations const &) = default;
792 
793 		void Apply(Decorations const &src);
794 
795 		void Apply(spv::Decoration decoration, uint32_t arg);
796 	};
797 
798 	std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations;
799 	std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations;
800 
801 	struct DescriptorDecorations
802 	{
803 		int32_t DescriptorSet = -1;
804 		int32_t Binding = -1;
805 		int32_t InputAttachmentIndex = -1;
806 
807 		void Apply(DescriptorDecorations const &src);
808 	};
809 
810 	std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations;
811 	std::vector<vk::Format> inputAttachmentFormats;
812 
813 	struct InterfaceComponent
814 	{
815 		AttribType Type;
816 
817 		union
818 		{
819 			struct
820 			{
821 				bool Flat : 1;
822 				bool Centroid : 1;
823 				bool NoPerspective : 1;
824 			};
825 
826 			uint8_t DecorationBits;
827 		};
828 
InterfaceComponentsw::SpirvShader::InterfaceComponent829 		InterfaceComponent()
830 		    : Type{ ATTRIBTYPE_UNUSED }
831 		    , DecorationBits{ 0 }
832 		{
833 		}
834 	};
835 
836 	struct BuiltinMapping
837 	{
838 		Object::ID Id;
839 		uint32_t FirstComponent;
840 		uint32_t SizeInComponents;
841 	};
842 
843 	struct WorkgroupMemory
844 	{
845 		// allocates a new variable of size bytes with the given identifier.
allocatesw::SpirvShader::WorkgroupMemory846 		inline void allocate(Object::ID id, uint32_t size)
847 		{
848 			uint32_t offset = totalSize;
849 			auto it = offsets.emplace(id, offset);
850 			ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
851 			totalSize += size;
852 		}
853 		// returns the byte offset of the variable with the given identifier.
offsetOfsw::SpirvShader::WorkgroupMemory854 		inline uint32_t offsetOf(Object::ID id) const
855 		{
856 			auto it = offsets.find(id);
857 			ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
858 			return it->second;
859 		}
860 		// returns the total allocated size in bytes.
sizesw::SpirvShader::WorkgroupMemory861 		inline uint32_t size() const { return totalSize; }
862 
863 	private:
864 		uint32_t totalSize = 0;                            // in bytes
865 		std::unordered_map<Object::ID, uint32_t> offsets;  // in bytes
866 	};
867 
868 	std::vector<InterfaceComponent> inputs;
869 	std::vector<InterfaceComponent> outputs;
870 
871 	void emitProlog(SpirvRoutine *routine) const;
872 	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const;
873 	void emitEpilog(SpirvRoutine *routine) const;
874 	void clearPhis(SpirvRoutine *routine) const;
875 
containsImageWrite() const876 	bool containsImageWrite() const { return imageWriteEmitted; }
877 
878 	using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
879 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
880 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
881 	WorkgroupMemory workgroupMemory;
882 
883 private:
884 	const bool robustBufferAccess;
885 
886 	Function::ID entryPoint;
887 	spv::ExecutionModel executionModel = spv::ExecutionModelMax;  // Invalid prior to OpEntryPoint parsing.
888 
889 	ExecutionModes executionModes = {};
890 	Analysis analysis = {};
891 	Capabilities capabilities = {};
892 	HandleMap<Type> types;
893 	HandleMap<Object> defs;
894 	HandleMap<Function> functions;
895 	std::unordered_map<StringID, String> strings;
896 	HandleMap<Extension> extensionsByID;
897 	std::unordered_set<uint32_t> extensionsImported;
898 	mutable bool imageWriteEmitted = false;
899 
900 	// DeclareType creates a Type for the given OpTypeX instruction, storing
901 	// it into the types map. It is called from the analysis pass (constructor).
902 	void DeclareType(InsnIterator insn);
903 
904 	void ProcessExecutionMode(InsnIterator it);
905 
906 	uint32_t ComputeTypeSize(InsnIterator insn);
907 	void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const;
908 	void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const;
909 	void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const;
910 
911 	// Creates an Object for the instruction's result in 'defs'.
912 	void DefineResult(const InsnIterator &insn);
913 
914 	// Processes the OpenCL.Debug.100 instruction for the initial definition
915 	// pass of the SPIR-V.
916 	void DefineOpenCLDebugInfo100(const InsnIterator &insn);
917 
918 	// Returns true if data in the given storage class is word-interleaved
919 	// by each SIMD vector lane, otherwise data is stored linerally.
920 	//
921 	// Each lane addresses a single word, picked by a base pointer and an
922 	// integer offset.
923 	//
924 	// A word is currently 32 bits (single float, int32_t, uint32_t).
925 	// A lane is a single element of a SIMD vector register.
926 	//
927 	// Storage interleaved by lane - (IsStorageInterleavedByLane() == true):
928 	// ---------------------------------------------------------------------
929 	//
930 	// Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex)
931 	//
932 	// Assuming SIMD::Width == 4:
933 	//
934 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
935 	//                 ===========+===========+===========+==========
936 	//  LaneOffset=0: |  Word[0]  |  Word[1]  |  Word[2]  |  Word[3]
937 	// ---------------+-----------+-----------+-----------+----------
938 	//  LaneOffset=1: |  Word[4]  |  Word[5]  |  Word[6]  |  Word[7]
939 	// ---------------+-----------+-----------+-----------+----------
940 	//  LaneOffset=2: |  Word[8]  |  Word[9]  |  Word[a]  |  Word[b]
941 	// ---------------+-----------+-----------+-----------+----------
942 	//  LaneOffset=3: |  Word[c]  |  Word[d]  |  Word[e]  |  Word[f]
943 	//
944 	//
945 	// Linear storage - (IsStorageInterleavedByLane() == false):
946 	// ---------------------------------------------------------
947 	//
948 	// Address = PtrBase + sizeof(Word) * LaneOffset
949 	//
950 	//                   Lane[0]  |  Lane[1]  |  Lane[2]  |  Lane[3]
951 	//                 ===========+===========+===========+==========
952 	//  LaneOffset=0: |  Word[0]  |  Word[0]  |  Word[0]  |  Word[0]
953 	// ---------------+-----------+-----------+-----------+----------
954 	//  LaneOffset=1: |  Word[1]  |  Word[1]  |  Word[1]  |  Word[1]
955 	// ---------------+-----------+-----------+-----------+----------
956 	//  LaneOffset=2: |  Word[2]  |  Word[2]  |  Word[2]  |  Word[2]
957 	// ---------------+-----------+-----------+-----------+----------
958 	//  LaneOffset=3: |  Word[3]  |  Word[3]  |  Word[3]  |  Word[3]
959 	//
960 	static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
961 	static bool IsExplicitLayout(spv::StorageClass storageClass);
962 
963 	static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p);
964 
965 	// Output storage buffers and images should not be affected by helper invocations
966 	static bool StoresInHelperInvocation(spv::StorageClass storageClass);
967 
968 	using InterfaceVisitor = std::function<void(Decorations const, AttribType)>;
969 
970 	void VisitInterface(Object::ID id, const InterfaceVisitor &v) const;
971 
972 	int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const;
973 
974 	// MemoryElement describes a scalar element within a structure, and is
975 	// used by the callback function of VisitMemoryObject().
976 	struct MemoryElement
977 	{
978 		uint32_t index;    // index of the scalar element
979 		uint32_t offset;   // offset (in bytes) from the base of the object
980 		const Type &type;  // element type
981 	};
982 
983 	using MemoryVisitor = std::function<void(const MemoryElement &)>;
984 
985 	// VisitMemoryObject() walks a type tree in an explicitly laid out
986 	// storage class, calling the MemoryVisitor for each scalar element
987 	// within the
988 	void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const;
989 
990 	// VisitMemoryObjectInner() is internally called by VisitMemoryObject()
991 	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const;
992 
993 	Object &CreateConstant(InsnIterator it);
994 
995 	void ProcessInterfaceVariable(Object &object);
996 
997 	// EmitState holds control-flow state for the emit() pass.
998 	class EmitState
999 	{
1000 	public:
EmitState(SpirvRoutine * routine,Function::ID function,RValue<SIMD::Int> activeLaneMask,RValue<SIMD::Int> storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,bool robustBufferAccess,unsigned int multiSampleCount,spv::ExecutionModel executionModel)1001 		EmitState(SpirvRoutine *routine,
1002 		          Function::ID function,
1003 		          RValue<SIMD::Int> activeLaneMask,
1004 		          RValue<SIMD::Int> storesAndAtomicsMask,
1005 		          const vk::DescriptorSet::Bindings &descriptorSets,
1006 		          bool robustBufferAccess,
1007 		          unsigned int multiSampleCount,
1008 		          spv::ExecutionModel executionModel)
1009 		    : routine(routine)
1010 		    , function(function)
1011 		    , activeLaneMaskValue(activeLaneMask.value())
1012 		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value())
1013 		    , descriptorSets(descriptorSets)
1014 		    , robustBufferAccess(robustBufferAccess)
1015 		    , multiSampleCount(multiSampleCount)
1016 		    , executionModel(executionModel)
1017 		{
1018 			ASSERT(executionModel != spv::ExecutionModelMax);  // Must parse OpEntryPoint before emitting.
1019 		}
1020 
1021 		// Returns the mask describing the active lanes as updated by dynamic
1022 		// control flow. Active lanes include helper invocations, used for
1023 		// calculating fragment derivitives, which must not perform memory
1024 		// stores or atomic writes.
1025 		//
1026 		// Use activeStoresAndAtomicsMask() to consider both control flow and
1027 		// lanes which are permitted to perform memory stores and atomic
1028 		// operations
activeLaneMask() const1029 		RValue<SIMD::Int> activeLaneMask() const
1030 		{
1031 			ASSERT(activeLaneMaskValue != nullptr);
1032 			return RValue<SIMD::Int>(activeLaneMaskValue);
1033 		}
1034 
1035 		// Returns the immutable lane mask that describes which lanes are
1036 		// permitted to perform memory stores and atomic operations.
1037 		// Note that unlike activeStoresAndAtomicsMask() this mask *does not*
1038 		// consider lanes that have been made inactive due to control flow.
storesAndAtomicsMask() const1039 		RValue<SIMD::Int> storesAndAtomicsMask() const
1040 		{
1041 			ASSERT(storesAndAtomicsMaskValue != nullptr);
1042 			return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
1043 		}
1044 
1045 		// Returns a lane mask that describes which lanes are permitted to
1046 		// perform memory stores and atomic operations, considering lanes that
1047 		// may have been made inactive due to control flow.
activeStoresAndAtomicsMask() const1048 		RValue<SIMD::Int> activeStoresAndAtomicsMask() const
1049 		{
1050 			return activeLaneMask() & storesAndAtomicsMask();
1051 		}
1052 
1053 		// Add a new active lane mask edge from the current block to out.
1054 		// The edge mask value will be (mask AND activeLaneMaskValue).
1055 		// If multiple active lane masks are added for the same edge, then
1056 		// they will be ORed together.
1057 		void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask);
1058 
1059 		// Add a new active lane mask for the edge from -> to.
1060 		// If multiple active lane masks are added for the same edge, then
1061 		// they will be ORed together.
1062 		void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
1063 
1064 		SpirvRoutine *routine = nullptr;                 // The current routine being built.
1065 		Function::ID function;                           // The current function being built.
1066 		Block::ID block;                                 // The current block being built.
1067 		rr::Value *activeLaneMaskValue = nullptr;        // The current active lane mask.
1068 		rr::Value *storesAndAtomicsMaskValue = nullptr;  // The current atomics mask.
1069 		Block::Set visited;                              // Blocks already built.
1070 		std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
1071 		std::deque<Block::ID> *pending;
1072 
1073 		const vk::DescriptorSet::Bindings &descriptorSets;
1074 
1075 		OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
1076 
getMultiSampleCount() const1077 		unsigned int getMultiSampleCount() const { return multiSampleCount; }
1078 
createIntermediate(Object::ID id,uint32_t componentCount)1079 		Intermediate &createIntermediate(Object::ID id, uint32_t componentCount)
1080 		{
1081 			auto it = intermediates.emplace(std::piecewise_construct,
1082 			                                std::forward_as_tuple(id),
1083 			                                std::forward_as_tuple(componentCount));
1084 			ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
1085 			return it.first->second;
1086 		}
1087 
getIntermediate(Object::ID id) const1088 		Intermediate const &getIntermediate(Object::ID id) const
1089 		{
1090 			auto it = intermediates.find(id);
1091 			ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
1092 			return it->second;
1093 		}
1094 
createPointer(Object::ID id,SIMD::Pointer ptr)1095 		void createPointer(Object::ID id, SIMD::Pointer ptr)
1096 		{
1097 			bool added = pointers.emplace(id, ptr).second;
1098 			ASSERT_MSG(added, "Pointer %d created twice", id.value());
1099 		}
1100 
getPointer(Object::ID id) const1101 		SIMD::Pointer const &getPointer(Object::ID id) const
1102 		{
1103 			auto it = pointers.find(id);
1104 			ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
1105 			return it->second;
1106 		}
1107 
1108 	private:
1109 		std::unordered_map<Object::ID, Intermediate> intermediates;
1110 		std::unordered_map<Object::ID, SIMD::Pointer> pointers;
1111 
1112 		const bool robustBufferAccess;  // Emit robustBufferAccess safe code.
1113 		const unsigned int multiSampleCount;
1114 		const spv::ExecutionModel executionModel;
1115 	};
1116 
1117 	// EmitResult is an enumerator of result values from the Emit functions.
1118 	enum class EmitResult
1119 	{
1120 		Continue,    // No termination instructions.
1121 		Terminator,  // Reached a termination instruction.
1122 	};
1123 
1124 	// Generic wrapper over either per-lane intermediate value, or a constant.
1125 	// Constants are transparently widened to per-lane values in operator[].
1126 	// This is appropriate in most cases -- if we're not going to do something
1127 	// significantly different based on whether the value is uniform across lanes.
1128 	class Operand
1129 	{
1130 	public:
1131 		Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId);
1132 		Operand(const Intermediate &value);
1133 
Float(uint32_t i) const1134 		RValue<SIMD::Float> Float(uint32_t i) const
1135 		{
1136 			if(intermediate)
1137 			{
1138 				return intermediate->Float(i);
1139 			}
1140 
1141 			// Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact
1142 			// bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant".
1143 			// Thus we must first construct an integer constant, and bitcast to float.
1144 			return As<SIMD::Float>(SIMD::UInt(constant[i]));
1145 		}
1146 
Int(uint32_t i) const1147 		RValue<SIMD::Int> Int(uint32_t i) const
1148 		{
1149 			if(intermediate)
1150 			{
1151 				return intermediate->Int(i);
1152 			}
1153 
1154 			return SIMD::Int(constant[i]);
1155 		}
1156 
UInt(uint32_t i) const1157 		RValue<SIMD::UInt> UInt(uint32_t i) const
1158 		{
1159 			if(intermediate)
1160 			{
1161 				return intermediate->UInt(i);
1162 			}
1163 
1164 			return SIMD::UInt(constant[i]);
1165 		}
1166 
1167 	private:
1168 		RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1169 
1170 		// Delegate constructor
1171 		Operand(const EmitState *state, const Object &object);
1172 
1173 		const uint32_t *constant;
1174 		const Intermediate *intermediate;
1175 
1176 	public:
1177 		const uint32_t componentCount;
1178 	};
1179 
RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1180 	RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)
1181 
1182 	Type const &getType(Type::ID id) const
1183 	{
1184 		auto it = types.find(id);
1185 		ASSERT_MSG(it != types.end(), "Unknown type %d", id.value());
1186 		return it->second;
1187 	}
1188 
getType(const Object & object) const1189 	Type const &getType(const Object &object) const
1190 	{
1191 		return getType(object.typeId());
1192 	}
1193 
getObject(Object::ID id) const1194 	Object const &getObject(Object::ID id) const
1195 	{
1196 		auto it = defs.find(id);
1197 		ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value());
1198 		return it->second;
1199 	}
1200 
getObjectType(Object::ID id) const1201 	Type const &getObjectType(Object::ID id) const
1202 	{
1203 		return getType(getObject(id));
1204 	}
1205 
getFunction(Function::ID id) const1206 	Function const &getFunction(Function::ID id) const
1207 	{
1208 		auto it = functions.find(id);
1209 		ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value());
1210 		return it->second;
1211 	}
1212 
getString(StringID id) const1213 	String const &getString(StringID id) const
1214 	{
1215 		auto it = strings.find(id);
1216 		ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value());
1217 		return it->second;
1218 	}
1219 
getExtension(Extension::ID id) const1220 	Extension const &getExtension(Extension::ID id) const
1221 	{
1222 		auto it = extensionsByID.find(id);
1223 		ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value());
1224 		return it->second;
1225 	}
1226 
1227 	// Returns a SIMD::Pointer to the underlying data for the given pointer
1228 	// object.
1229 	// Handles objects of the following kinds:
1230 	//  - DescriptorSet
1231 	//  - Pointer
1232 	//  - InterfaceVariable
1233 	// Calling GetPointerToData with objects of any other kind will assert.
1234 	SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const;
1235 
1236 	SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1237 	SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1238 
1239 	// Returns the *component* offset in the literal for the given access chain.
1240 	uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
1241 
1242 	// Lookup the active lane mask for the edge from -> to.
1243 	// If from is unreachable, then a mask of all zeros is returned.
1244 	// Asserts if from is reachable and the edge does not exist.
1245 	RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const;
1246 
1247 	// Updates the current active lane mask.
1248 	void SetActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1249 
1250 	// Emit all the unvisited blocks (except for ignore) in DFS order,
1251 	// starting with id.
1252 	void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const;
1253 	void EmitNonLoop(EmitState *state) const;
1254 	void EmitLoop(EmitState *state) const;
1255 
1256 	void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const;
1257 	EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const;
1258 
1259 	// Emit pass instructions:
1260 	EmitResult EmitVariable(InsnIterator insn, EmitState *state) const;
1261 	EmitResult EmitLoad(InsnIterator insn, EmitState *state) const;
1262 	EmitResult EmitStore(InsnIterator insn, EmitState *state) const;
1263 	EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const;
1264 	EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const;
1265 	EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const;
1266 	EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const;
1267 	EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const;
1268 	EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const;
1269 	EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const;
1270 	EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const;
1271 	EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const;
1272 	EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const;
1273 	EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const;
1274 	EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const;
1275 	EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const;
1276 	EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const;
1277 	EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const;
1278 	EmitResult EmitDot(InsnIterator insn, EmitState *state) const;
1279 	EmitResult EmitSelect(InsnIterator insn, EmitState *state) const;
1280 	EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const;
1281 	EmitResult EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const;
1282 	EmitResult EmitOpenCLDebugInfo100(InsnIterator insn, EmitState *state) const;
1283 	EmitResult EmitLine(InsnIterator insn, EmitState *state) const;
1284 	EmitResult EmitAny(InsnIterator insn, EmitState *state) const;
1285 	EmitResult EmitAll(InsnIterator insn, EmitState *state) const;
1286 	EmitResult EmitBranch(InsnIterator insn, EmitState *state) const;
1287 	EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const;
1288 	EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const;
1289 	EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const;
1290 	EmitResult EmitReturn(InsnIterator insn, EmitState *state) const;
1291 	EmitResult EmitKill(InsnIterator insn, EmitState *state) const;
1292 	EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const;
1293 	EmitResult EmitPhi(InsnIterator insn, EmitState *state) const;
1294 	EmitResult EmitImageSample(const ImageInstruction &instruction, EmitState *state) const;
1295 	EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const;
1296 	EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const;
1297 	EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const;
1298 	EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const;
1299 	EmitResult EmitImageRead(const ImageInstruction &instruction, EmitState *state) const;
1300 	EmitResult EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const;
1301 	EmitResult EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const;
1302 	EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
1303 	EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
1304 	EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
1305 	EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const;
1306 	EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
1307 	EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const;
1308 	EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
1309 	EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
1310 	EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
1311 
1312 	// Emits code to sample an image, regardless of whether any SIMD lanes are active.
1313 	void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const;
1314 
1315 	Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const;
1316 	void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const;
1317 
1318 	void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
1319 	static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state);
1320 	static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat);
1321 	uint32_t GetConstScalarInt(Object::ID id) const;
1322 	void EvalSpecConstantOp(InsnIterator insn);
1323 	void EvalSpecConstantUnaryOp(InsnIterator insn);
1324 	void EvalSpecConstantBinaryOp(InsnIterator insn);
1325 
1326 	// Fragment input interpolation functions
1327 	uint32_t GetNumInputComponents(int32_t location) const;
1328 	uint32_t GetPackedInterpolant(int32_t location) const;
1329 	enum InterpolationType
1330 	{
1331 		Centroid,
1332 		AtSample,
1333 		AtOffset,
1334 	};
1335 	SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
1336 	                        uint32_t component, EmitState *state, InterpolationType type) const;
1337 
1338 	// Helper for implementing OpStore, which doesn't take an InsnIterator so it
1339 	// can also store independent operands.
1340 	void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder, EmitState *state) const;
1341 
1342 	// LoadPhi loads the phi values from the alloca storage and places the
1343 	// load values into the intermediate with the phi's result id.
1344 	void LoadPhi(InsnIterator insn, EmitState *state) const;
1345 
1346 	// StorePhi updates the phi's alloca storage value using the incoming
1347 	// values from blocks that are both in the OpPhi instruction and in
1348 	// filter.
1349 	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const;
1350 
1351 	// Emits a rr::Fence for the given MemorySemanticsMask.
1352 	void Fence(spv::MemorySemanticsMask semantics) const;
1353 
1354 	// Helper for calling rr::Yield with res cast to an rr::Int.
1355 	void Yield(YieldResult res) const;
1356 
1357 	// WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's
1358 	// control flow to the given file path.
1359 	void WriteCFGGraphVizDotFile(const char *path) const;
1360 
1361 	// OpcodeName() returns the name of the opcode op.
1362 	static const char *OpcodeName(spv::Op op);
1363 	static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
1364 
1365 	// IsStatement() returns true if the given opcode actually performs
1366 	// work (as opposed to declaring a type, defining a function start / end,
1367 	// etc).
1368 	static bool IsStatement(spv::Op op);
1369 
1370 	// HasTypeAndResult() returns true if the given opcode's instruction
1371 	// has a result type ID and result ID, i.e. defines an Object.
1372 	static bool HasTypeAndResult(spv::Op op);
1373 
1374 	// Helper as we often need to take dot products as part of doing other things.
1375 	SIMD::Float Dot(unsigned numComponents, Operand const &x, Operand const &y) const;
1376 
1377 	// Splits x into a floating-point significand in the range [0.5, 1.0)
1378 	// and an integral exponent of two, such that:
1379 	//   x = significand * 2^exponent
1380 	// Returns the pair <significand, exponent>
1381 	std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
1382 
1383 	static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId);
1384 	static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState);
1385 	static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState);
1386 
1387 	// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
1388 	static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod);
1389 	static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState);
1390 	static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType);
1391 
1392 	// Returns 0 when invalid.
1393 	static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
1394 
1395 	// Debugger API functions. When ENABLE_VK_DEBUGGER is not defined, these
1396 	// are all no-ops.
1397 
1398 	// dbgInit() initializes the debugger code generation.
1399 	// All other dbgXXX() functions are no-op until this is called.
1400 	void dbgInit(const std::shared_ptr<vk::dbg::Context> &dbgctx);
1401 
1402 	// dbgTerm() terminates the debugger code generation.
1403 	void dbgTerm();
1404 
1405 	// dbgCreateFile() generates a synthetic file containing the disassembly
1406 	// of the SPIR-V shader. This is the file displayed in the debug
1407 	// session.
1408 	void dbgCreateFile();
1409 
1410 	// dbgBeginEmit() sets up the debugging state for the shader.
1411 	void dbgBeginEmit(EmitState *state) const;
1412 
1413 	// dbgEndEmit() tears down the debugging state for the shader.
1414 	void dbgEndEmit(EmitState *state) const;
1415 
1416 	// dbgBeginEmitInstruction() updates the current debugger location for
1417 	// the given instruction.
1418 	void dbgBeginEmitInstruction(InsnIterator insn, EmitState *state) const;
1419 
1420 	// dbgEndEmitInstruction() creates any new debugger variables for the
1421 	// instruction that just completed.
1422 	void dbgEndEmitInstruction(InsnIterator insn, EmitState *state) const;
1423 
1424 	// dbgExposeIntermediate() exposes the intermediate with the given ID to
1425 	// the debugger.
1426 	void dbgExposeIntermediate(Object::ID id, EmitState *state) const;
1427 
1428 	// dbgUpdateActiveLaneMask() updates the active lane masks to the
1429 	// debugger.
1430 	void dbgUpdateActiveLaneMask(RValue<SIMD::Int> mask, EmitState *state) const;
1431 
1432 	// dbgDeclareResult() associates resultId as the result of the given
1433 	// instruction.
1434 	void dbgDeclareResult(const InsnIterator &insn, Object::ID resultId) const;
1435 
1436 	// Impl holds forward declaration structs and pointers to state for the
1437 	// private implementations in the corresponding SpirvShaderXXX.cpp files.
1438 	// This allows access to the private members of the SpirvShader, without
1439 	// littering the header with implementation details.
1440 	struct Impl
1441 	{
1442 		struct Debugger;
1443 		struct Group;
1444 		Debugger *debugger = nullptr;
1445 	};
1446 	Impl impl;
1447 };
1448 
1449 class SpirvRoutine
1450 {
1451 public:
1452 	SpirvRoutine(vk::PipelineLayout const *pipelineLayout);
1453 
1454 	using Variable = Array<SIMD::Float>;
1455 
1456 	// Single-entry 'inline' sampler routine cache.
1457 	struct SamplerCache
1458 	{
1459 		Pointer<Byte> imageDescriptor = nullptr;
1460 		Int samplerId;
1461 
1462 		Pointer<Byte> function;
1463 	};
1464 
1465 	struct InterpolationData
1466 	{
1467 		Pointer<Byte> primitive;
1468 		SIMD::Float x;
1469 		SIMD::Float y;
1470 		SIMD::Float rhw;
1471 		SIMD::Float xCentroid;
1472 		SIMD::Float yCentroid;
1473 		SIMD::Float rhwCentroid;
1474 	};
1475 
1476 	vk::PipelineLayout const *const pipelineLayout;
1477 
1478 	std::unordered_map<SpirvShader::Object::ID, Variable> variables;
1479 	std::unordered_map<uint32_t, SamplerCache> samplerCache;  // Indexed by the instruction position, in words.
1480 	Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS };
1481 	Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS };
1482 	InterpolationData interpolationData;
1483 
1484 	Pointer<Byte> device;
1485 	Pointer<Byte> workgroupMemory;
1486 	Pointer<Pointer<Byte>> descriptorSets;
1487 	Pointer<Int> descriptorDynamicOffsets;
1488 	Pointer<Byte> pushConstants;
1489 	Pointer<Byte> constants;
1490 	Int killMask = Int{ 0 };
1491 
1492 	// Shader invocation state.
1493 	// Not all of these variables are used for every type of shader, and some
1494 	// are only used when debugging. See b/146486064 for more information.
1495 	// Give careful consideration to the runtime performance loss before adding
1496 	// more state here.
1497 	std::array<SIMD::Int, 2> windowSpacePosition;
1498 	Int viewID;  // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
1499 	Int instanceID;
1500 	SIMD::Int vertexIndex;
1501 	std::array<SIMD::Float, 4> fragCoord;
1502 	std::array<SIMD::Float, 4> pointCoord;
1503 	SIMD::Int helperInvocation;
1504 	Int4 numWorkgroups;
1505 	Int4 workgroupID;
1506 	Int4 workgroupSize;
1507 	Int subgroupsPerWorkgroup;
1508 	Int invocationsPerSubgroup;
1509 	Int subgroupIndex;
1510 	SIMD::Int localInvocationIndex;
1511 	std::array<SIMD::Int, 3> localInvocationID;
1512 	std::array<SIMD::Int, 3> globalInvocationID;
1513 
1514 	Pointer<Byte> dbgState;  // Pointer to a debugger state.
1515 
createVariable(SpirvShader::Object::ID id,uint32_t componentCount)1516 	void createVariable(SpirvShader::Object::ID id, uint32_t componentCount)
1517 	{
1518 		bool added = variables.emplace(id, Variable(componentCount)).second;
1519 		ASSERT_MSG(added, "Variable %d created twice", id.value());
1520 	}
1521 
getVariable(SpirvShader::Object::ID id)1522 	Variable &getVariable(SpirvShader::Object::ID id)
1523 	{
1524 		auto it = variables.find(id);
1525 		ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
1526 		return it->second;
1527 	}
1528 
1529 	// setImmutableInputBuiltins() sets all the immutable input builtins,
1530 	// common for all shader types.
1531 	void setImmutableInputBuiltins(SpirvShader const *shader);
1532 
1533 	static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
1534 
1535 	// setInputBuiltin() calls f() with the builtin and value if the shader
1536 	// uses the input builtin, otherwise the call is a no-op.
1537 	// F is a function with the signature:
1538 	// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
1539 	template<typename F>
setInputBuiltin(SpirvShader const * shader,spv::BuiltIn id,F && f)1540 	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f)
1541 	{
1542 		auto it = shader->inputBuiltins.find(id);
1543 		if(it != shader->inputBuiltins.end())
1544 		{
1545 			const auto &builtin = it->second;
1546 			f(builtin, getVariable(builtin.Id));
1547 		}
1548 	}
1549 
1550 private:
1551 	// The phis are only accessible to SpirvShader as they are only used and
1552 	// exist between calls to SpirvShader::emitProlog() and
1553 	// SpirvShader::emitEpilog().
1554 	friend class SpirvShader;
1555 
1556 	std::unordered_map<SpirvShader::Object::ID, Variable> phis;
1557 };
1558 
1559 }  // namespace sw
1560 
1561 #endif  // sw_SpirvShader_hpp
1562