• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "System/Types.hpp"
18 
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21 
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace sw {
25 
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 	switch(format)
29 	{
30 	case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 	case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 	case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 	case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 	case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 	case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 	case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 	case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 	case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 	case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 	case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 	case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 	case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 	case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 	case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 	case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 	case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 	case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 	case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 	case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 	case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 	case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 	case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 	case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70 
71 	default:
72 		UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 		return VK_FORMAT_UNDEFINED;
74 	}
75 }
76 
ImageInstruction(InsnIterator insn,const SpirvShader & spirv)77 SpirvShader::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &spirv)
78     : ImageInstructionSignature(parseVariantAndMethod(insn))
79     , position(insn.distanceFrom(spirv.begin()))
80 {
81 	if(samplerMethod == Write)
82 	{
83 		imageId = insn.word(1);
84 		coordinateId = insn.word(2);
85 		texelId = insn.word(3);
86 	}
87 	else
88 	{
89 		resultTypeId = insn.resultTypeId();  // word(1)
90 		resultId = insn.resultId();          // word(2)
91 
92 		if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer)  // Samplerless
93 		{
94 			imageId = insn.word(3);
95 		}
96 		else
97 		{
98 			Object::ID sampledImageId = insn.word(3);
99 			const Object &sampledImage = spirv.getObject(sampledImageId);
100 
101 			if(sampledImage.opcode() == spv::OpSampledImage)
102 			{
103 				imageId = sampledImage.definition.word(3);
104 				samplerId = sampledImage.definition.word(4);
105 			}
106 			else  // Combined image/sampler
107 			{
108 				imageId = sampledImageId;
109 				samplerId = sampledImageId;
110 			}
111 		}
112 
113 		coordinateId = insn.word(4);
114 	}
115 
116 	// `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
117 	// To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
118 	const Type &imageObjectType = spirv.getObjectType(imageId);
119 	const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
120 	                                     ? spirv.getType(imageObjectType.definition.word(2))
121 	                                     : imageObjectType;
122 	const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
123 	                             ? spirv.getType(imageReferenceType.element)
124 	                             : imageReferenceType);
125 
126 	ASSERT(imageType.opcode() == spv::OpTypeImage);
127 	dim = imageType.definition.word(3);
128 	arrayed = imageType.definition.word(5);
129 	imageFormat = imageType.definition.word(8);
130 
131 	const Object &coordinateObject = spirv.getObject(coordinateId);
132 	const Type &coordinateType = spirv.getType(coordinateObject);
133 	coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
134 
135 	if(samplerMethod == TexelPointer)
136 	{
137 		sampleId = insn.word(5);
138 		sample = !spirv.getObject(sampleId).isConstantZero();
139 	}
140 
141 	if(isDref())
142 	{
143 		drefId = insn.word(5);
144 	}
145 
146 	if(samplerMethod == Gather)
147 	{
148 		gatherComponent = !isDref() ? spirv.getObject(insn.word(5)).constantValue[0] : 0;
149 	}
150 
151 	uint32_t operandsIndex = getImageOperandsIndex(insn);
152 	uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0;  // The mask which indicates which operands are provided.
153 
154 	operandsIndex += 1;  // Advance to the first actual operand <id> location.
155 
156 	if(imageOperands & spv::ImageOperandsBiasMask)
157 	{
158 		ASSERT(samplerMethod == Bias);
159 		lodOrBiasId = insn.word(operandsIndex);
160 		operandsIndex += 1;
161 		imageOperands &= ~spv::ImageOperandsBiasMask;
162 	}
163 
164 	if(imageOperands & spv::ImageOperandsLodMask)
165 	{
166 		ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
167 		lodOrBiasId = insn.word(operandsIndex);
168 		operandsIndex += 1;
169 		imageOperands &= ~spv::ImageOperandsLodMask;
170 	}
171 
172 	if(imageOperands & spv::ImageOperandsGradMask)
173 	{
174 		ASSERT(samplerMethod == Grad);
175 		gradDxId = insn.word(operandsIndex + 0);
176 		gradDyId = insn.word(operandsIndex + 1);
177 		operandsIndex += 2;
178 		imageOperands &= ~spv::ImageOperandsGradMask;
179 
180 		grad = spirv.getObjectType(gradDxId).componentCount;
181 	}
182 
183 	if(imageOperands & spv::ImageOperandsConstOffsetMask)
184 	{
185 		offsetId = insn.word(operandsIndex);
186 		operandsIndex += 1;
187 		imageOperands &= ~spv::ImageOperandsConstOffsetMask;
188 
189 		offset = spirv.getObjectType(offsetId).componentCount;
190 	}
191 
192 	if(imageOperands & spv::ImageOperandsSampleMask)
193 	{
194 		ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
195 		sampleId = insn.word(operandsIndex);
196 		operandsIndex += 1;
197 		imageOperands &= ~spv::ImageOperandsSampleMask;
198 
199 		sample = !spirv.getObject(sampleId).isConstantZero();
200 	}
201 
202 	// TODO(b/174475384)
203 	if(imageOperands & spv::ImageOperandsZeroExtendMask)
204 	{
205 		ASSERT(samplerMethod == Read || samplerMethod == Write);
206 		imageOperands &= ~spv::ImageOperandsZeroExtendMask;
207 	}
208 	else if(imageOperands & spv::ImageOperandsSignExtendMask)
209 	{
210 		ASSERT(samplerMethod == Read || samplerMethod == Write);
211 		imageOperands &= ~spv::ImageOperandsSignExtendMask;
212 	}
213 
214 	[[maybe_unused]] spv::Scope scope = spv::ScopeCrossDevice;  // "Whilst the CrossDevice scope is defined in SPIR-V, it is disallowed in Vulkan."
215 
216 	if(imageOperands & spv::ImageOperandsMakeTexelAvailableMask)
217 	{
218 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
219 		operandsIndex += 1;
220 		imageOperands &= ~spv::ImageOperandsMakeTexelAvailableMask;
221 	}
222 
223 	if(imageOperands & spv::ImageOperandsMakeTexelVisibleMask)
224 	{
225 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
226 		operandsIndex += 1;
227 		imageOperands &= ~spv::ImageOperandsMakeTexelVisibleMask;
228 	}
229 
230 	if(imageOperands & spv::ImageOperandsNonPrivateTexelMask)
231 	{
232 		imageOperands &= ~spv::ImageOperandsNonPrivateTexelMask;
233 	}
234 
235 	if(imageOperands & spv::ImageOperandsVolatileTexelMask)
236 	{
237 		UNIMPLEMENTED("b/176819536");
238 		imageOperands &= ~spv::ImageOperandsVolatileTexelMask;
239 	}
240 
241 	// There should be no remaining image operands.
242 	if(imageOperands != 0)
243 	{
244 		UNSUPPORTED("Image operands 0x%08X", imageOperands);
245 	}
246 }
247 
parseVariantAndMethod(InsnIterator insn)248 SpirvShader::ImageInstructionSignature SpirvShader::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
249 {
250 	uint32_t imageOperands = getImageOperandsMask(insn);
251 	bool bias = imageOperands & spv::ImageOperandsBiasMask;
252 	bool grad = imageOperands & spv::ImageOperandsGradMask;
253 
254 	switch(insn.opcode())
255 	{
256 	case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
257 	case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
258 	case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
259 	case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
260 	case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
261 	case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
262 	case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
263 	case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
264 	case spv::OpImageGather: return { None, Gather };
265 	case spv::OpImageDrefGather: return { Dref, Gather };
266 	case spv::OpImageFetch: return { None, Fetch };
267 	case spv::OpImageQueryLod: return { None, Query };
268 	case spv::OpImageRead: return { None, Read };
269 	case spv::OpImageWrite: return { None, Write };
270 	case spv::OpImageTexelPointer: return { None, TexelPointer };
271 
272 	default:
273 		ASSERT(false);
274 		return { None, Implicit };
275 	}
276 }
277 
278 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)279 uint32_t SpirvShader::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
280 {
281 	switch(insn.opcode())
282 	{
283 	case spv::OpImageSampleImplicitLod:
284 	case spv::OpImageSampleProjImplicitLod:
285 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
286 	case spv::OpImageSampleExplicitLod:
287 	case spv::OpImageSampleProjExplicitLod:
288 		return 5;  // "Either Lod or Grad image operands must be present."
289 	case spv::OpImageSampleDrefImplicitLod:
290 	case spv::OpImageSampleProjDrefImplicitLod:
291 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
292 	case spv::OpImageSampleDrefExplicitLod:
293 	case spv::OpImageSampleProjDrefExplicitLod:
294 		return 6;  // "Either Lod or Grad image operands must be present."
295 	case spv::OpImageGather:
296 	case spv::OpImageDrefGather:
297 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
298 	case spv::OpImageFetch:
299 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
300 	case spv::OpImageQueryLod:
301 		ASSERT(insn.wordCount() == 5);
302 		return 0;  // No image operands.
303 	case spv::OpImageRead:
304 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
305 	case spv::OpImageWrite:
306 		return insn.wordCount() > 4 ? 4 : 0;  // Optional
307 	case spv::OpImageTexelPointer:
308 		ASSERT(insn.wordCount() == 6);
309 		return 0;  // No image operands.
310 
311 	default:
312 		ASSERT(false);
313 		return 0;
314 	}
315 }
316 
getImageOperandsMask(InsnIterator insn)317 uint32_t SpirvShader::ImageInstruction::getImageOperandsMask(InsnIterator insn)
318 {
319 	uint32_t operandsIndex = getImageOperandsIndex(insn);
320 	return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
321 }
322 
EmitImageSample(const ImageInstruction & instruction,EmitState * state) const323 SpirvShader::EmitResult SpirvShader::EmitImageSample(const ImageInstruction &instruction, EmitState *state) const
324 {
325 	auto &resultType = getType(instruction.resultTypeId);
326 	auto &result = state->createIntermediate(instruction.resultId, resultType.componentCount);
327 	Array<SIMD::Float> out(4);
328 
329 	// TODO(b/153380916): When we're in a code path that is always executed,
330 	// i.e. post-dominators of the entry block, we don't have to dynamically
331 	// check whether any lanes are active, and can elide the jump.
332 	If(AnyTrue(state->activeLaneMask()))
333 	{
334 		EmitImageSampleUnconditional(out, instruction, state);
335 	}
336 
337 	for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
338 
339 	return EmitResult::Continue;
340 }
341 
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction,EmitState * state) const342 void SpirvShader::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const
343 {
344 	Pointer<Byte> imageDescriptor = state->getPointer(instruction.imageId).base;  // vk::SampledImageDescriptor*
345 
346 	Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, instruction, state);
347 
348 	callSamplerFunction(samplerFunction, out, imageDescriptor, instruction, state);
349 }
350 
lookupSamplerFunction(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const351 Pointer<Byte> SpirvShader::lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
352 {
353 	Int samplerId = 0;
354 
355 	if(instruction.samplerId != 0)
356 	{
357 		Pointer<Byte> samplerDescriptor = state->getPointer(instruction.samplerId).base;  // vk::SampledImageDescriptor*
358 
359 		samplerId = *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId));  // vk::Sampler::id
360 	}
361 
362 	auto &cache = state->routine->samplerCache.at(instruction.position);
363 	Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId);  // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
364 
365 	If(!cacheHit)
366 	{
367 		rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
368 		cache.function = Call(getImageSampler, state->routine->device, instruction.signature, samplerId, imageViewId);
369 		cache.imageDescriptor = imageDescriptor;
370 		cache.samplerId = samplerId;
371 	}
372 
373 	return cache.function;
374 }
375 
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const376 void SpirvShader::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
377 {
378 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.
379 
380 	auto coordinate = Operand(this, state, instruction.coordinateId);
381 
382 	uint32_t i = 0;
383 	for(; i < instruction.coordinates; i++)
384 	{
385 		if(instruction.isProj())
386 		{
387 			in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
388 		}
389 		else
390 		{
391 			in[i] = coordinate.Float(i);
392 		}
393 	}
394 
395 	if(instruction.isDref())
396 	{
397 		auto drefValue = Operand(this, state, instruction.drefId);
398 
399 		if(instruction.isProj())
400 		{
401 			in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
402 		}
403 		else
404 		{
405 			in[i] = drefValue.Float(0);
406 		}
407 
408 		i++;
409 	}
410 
411 	if(instruction.lodOrBiasId != 0)
412 	{
413 		auto lodValue = Operand(this, state, instruction.lodOrBiasId);
414 		in[i] = lodValue.Float(0);
415 		i++;
416 	}
417 	else if(instruction.gradDxId != 0)
418 	{
419 		auto dxValue = Operand(this, state, instruction.gradDxId);
420 		auto dyValue = Operand(this, state, instruction.gradDyId);
421 		ASSERT(dxValue.componentCount == dxValue.componentCount);
422 
423 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
424 		{
425 			in[i] = dxValue.Float(j);
426 		}
427 
428 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
429 		{
430 			in[i] = dyValue.Float(j);
431 		}
432 	}
433 	else if(instruction.samplerMethod == Fetch)
434 	{
435 		// The instruction didn't provide a lod operand, but the sampler's Fetch
436 		// function requires one to be present. If no lod is supplied, the default
437 		// is zero.
438 		in[i] = As<SIMD::Float>(SIMD::Int(0));
439 		i++;
440 	}
441 
442 	if(instruction.offsetId != 0)
443 	{
444 		auto offsetValue = Operand(this, state, instruction.offsetId);
445 
446 		for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
447 		{
448 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
449 		}
450 	}
451 
452 	if(instruction.sample)
453 	{
454 		auto sampleValue = Operand(this, state, instruction.sampleId);
455 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
456 	}
457 
458 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);  // sw::Texture*
459 
460 	Call<ImageSampler>(samplerFunction, texture, &in, &out, state->routine->constants);
461 }
462 
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const463 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
464 {
465 	auto &resultTy = getType(insn.resultTypeId());
466 	auto imageId = Object::ID(insn.word(3));
467 	auto lodId = Object::ID(insn.word(4));
468 
469 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
470 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
471 
472 	return EmitResult::Continue;
473 }
474 
EmitImageQuerySize(InsnIterator insn,EmitState * state) const475 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
476 {
477 	auto &resultTy = getType(insn.resultTypeId());
478 	auto imageId = Object::ID(insn.word(3));
479 	auto lodId = Object::ID(0);
480 
481 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
482 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
483 
484 	return EmitResult::Continue;
485 }
486 
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const487 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
488 {
489 	auto routine = state->routine;
490 	auto &image = getObject(imageId);
491 	auto &imageType = getType(image);
492 
493 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
494 	bool isArrayed = imageType.definition.word(5) != 0;
495 	uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
496 
497 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
498 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
499 
500 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
501 
502 	Int width;
503 	Int height;
504 	Int depth;
505 
506 	switch(descriptorType)
507 	{
508 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
509 	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
510 		width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
511 		height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
512 		depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
513 		break;
514 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
515 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
516 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
517 		width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
518 		height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
519 		depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
520 		break;
521 	default:
522 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
523 	}
524 
525 	if(lodId != 0)
526 	{
527 		auto lodVal = Operand(this, state, lodId);
528 		ASSERT(lodVal.componentCount == 1);
529 		auto lod = lodVal.Int(0);
530 		auto one = SIMD::Int(1);
531 
532 		if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
533 		if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
534 		if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
535 	}
536 	else
537 	{
538 
539 		if(dimensions >= 1) dst.move(0, SIMD::Int(width));
540 		if(dimensions >= 2) dst.move(1, SIMD::Int(height));
541 		if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
542 	}
543 
544 	if(isArrayed)
545 	{
546 		dst.move(dimensions, SIMD::Int(depth));
547 	}
548 }
549 
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const550 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
551 {
552 	auto &resultTy = getType(insn.resultTypeId());
553 	ASSERT(resultTy.componentCount == 1);
554 	auto imageId = Object::ID(insn.word(3));
555 
556 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
557 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
558 
559 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
560 	Int mipLevels = 0;
561 	switch(descriptorType)
562 	{
563 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
564 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
565 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
566 		mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
567 		break;
568 	default:
569 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
570 	}
571 
572 	auto &dst = state->createIntermediate(insn.resultId(), 1);
573 	dst.move(0, SIMD::Int(mipLevels));
574 
575 	return EmitResult::Continue;
576 }
577 
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const578 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
579 {
580 	auto &resultTy = getType(insn.resultTypeId());
581 	ASSERT(resultTy.componentCount == 1);
582 	auto imageId = Object::ID(insn.word(3));
583 	auto imageTy = getObjectType(imageId);
584 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
585 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
586 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);
587 
588 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
589 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
590 
591 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
592 	Int sampleCount = 0;
593 	switch(descriptorType)
594 	{
595 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
596 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
597 		break;
598 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
599 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
600 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
601 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
602 		break;
603 	default:
604 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
605 	}
606 
607 	auto &dst = state->createIntermediate(insn.resultId(), 1);
608 	dst.move(0, SIMD::Int(sampleCount));
609 
610 	return EmitResult::Continue;
611 }
612 
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const EmitState * state)613 SIMD::Pointer SpirvShader::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state)
614 {
615 	bool isArrayed = instruction.arrayed;
616 	spv::Dim dim = static_cast<spv::Dim>(instruction.dim);
617 	int dims = instruction.coordinates - (isArrayed ? 1 : 0);
618 
619 	SIMD::Int u = coordinate[0];
620 	SIMD::Int v = SIMD::Int(0);
621 
622 	if(dims > 1)
623 	{
624 		v = coordinate[1];
625 	}
626 
627 	if(dim == spv::DimSubpassData)
628 	{
629 		u += state->routine->windowSpacePosition[0];
630 		v += state->routine->windowSpacePosition[1];
631 	}
632 
633 	const int texelSize = imageFormat.bytes();
634 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
635 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
636 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
637 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
638 	auto slicePitch = SIMD::Int(
639 	    *Pointer<Int>(descriptor + (useStencilAspect
640 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
641 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
642 	auto samplePitch = SIMD::Int(
643 	    *Pointer<Int>(descriptor + (useStencilAspect
644 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
645 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
646 
647 	SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
648 
649 	if(dims > 1)
650 	{
651 		ptrOffset += v * rowPitch;
652 	}
653 
654 	SIMD::Int w = 0;
655 	if((dims > 2) || isArrayed)
656 	{
657 		if(dims > 2)
658 		{
659 			w += coordinate[2];
660 		}
661 
662 		if(isArrayed)
663 		{
664 			w += coordinate[dims];
665 		}
666 
667 		ptrOffset += w * slicePitch;
668 	}
669 
670 	if(dim == spv::DimSubpassData)
671 	{
672 		// Multiview input attachment access is to the layer corresponding to the current view
673 		ptrOffset += SIMD::Int(state->routine->layer) * slicePitch;
674 	}
675 
676 	if(instruction.sample)
677 	{
678 		ptrOffset += sample * samplePitch;
679 	}
680 
681 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
682 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
683 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
684 	{
685 		SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
686 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
687 
688 		if(dims > 1)
689 		{
690 			SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
691 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
692 		}
693 
694 		if((dims > 2) || isArrayed)
695 		{
696 			UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
697 			if(dim == spv::DimCube) { depth *= 6; }
698 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth)));
699 		}
700 
701 		if(instruction.sample)
702 		{
703 			SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
704 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
705 		}
706 
707 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
708 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
709 
710 		ptrOffset = (ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
711 	}
712 
713 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
714 	                                                                    ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
715 	                                                                    : OFFSET(vk::StorageImageDescriptor, ptr)));
716 
717 	Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
718 
719 	return SIMD::Pointer(imageBase, imageSizeInBytes, ptrOffset);
720 }
721 
EmitImageRead(const ImageInstruction & instruction,EmitState * state) const722 SpirvShader::EmitResult SpirvShader::EmitImageRead(const ImageInstruction &instruction, EmitState *state) const
723 {
724 	auto &resultType = getObjectType(instruction.resultId);
725 	auto &image = getObject(instruction.imageId);
726 	auto &imageType = getType(image);
727 
728 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
729 	auto dim = static_cast<spv::Dim>(instruction.dim);
730 
731 	auto coordinate = Operand(this, state, instruction.coordinateId);
732 	const DescriptorDecorations &d = descriptorDecorations.at(instruction.imageId);
733 
734 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
735 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
736 	vk::Format imageFormat = (dim == spv::DimSubpassData)
737 	                             ? inputAttachmentFormats[d.InputAttachmentIndex]
738 	                             : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
739 
740 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
741 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
742 	bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
743 	                         getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
744 
745 	if(useStencilAspect)
746 	{
747 		imageFormat = VK_FORMAT_S8_UINT;
748 	}
749 
750 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
751 	auto &dst = state->createIntermediate(instruction.resultId, resultType.componentCount);
752 
753 	// VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
754 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
755 	auto robustness = OutOfBoundsBehavior::Nullify;
756 
757 	SIMD::Int uvwa[4];
758 	SIMD::Int sample;
759 
760 	for(uint32_t i = 0; i < instruction.coordinates; i++)
761 	{
762 		uvwa[i] = coordinate.Int(i);
763 	}
764 
765 	if(instruction.sample)
766 	{
767 		sample = Operand(this, state, instruction.sampleId).Int(0);
768 	}
769 
770 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
771 
772 	const int texelSize = imageFormat.bytes();
773 
774 	// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
775 	// TODO(b/160531165): Provide gather abstractions for various element sizes.
776 	SIMD::Int packed[4];
777 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
778 	{
779 		for(auto i = 0; i < texelSize / 4; i++)
780 		{
781 			packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
782 			texelPtr += sizeof(float);
783 		}
784 	}
785 	else if(texelSize == 2)
786 	{
787 		SIMD::Int offsets = texelPtr.offsets();
788 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
789 
790 		for(int i = 0; i < SIMD::Width; i++)
791 		{
792 			If(Extract(mask, i) != 0)
793 			{
794 				packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
795 			}
796 		}
797 	}
798 	else if(texelSize == 1)
799 	{
800 		SIMD::Int offsets = texelPtr.offsets();
801 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
802 
803 		for(int i = 0; i < SIMD::Width; i++)
804 		{
805 			If(Extract(mask, i) != 0)
806 			{
807 				packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
808 			}
809 		}
810 	}
811 	else
812 		UNREACHABLE("texelSize: %d", int(texelSize));
813 
814 	// Format support requirements here come from two sources:
815 	// - Minimum required set of formats for loads from storage images
816 	// - Any format supported as a color or depth/stencil attachment, for input attachments
817 	switch(imageFormat)
818 	{
819 	case VK_FORMAT_R32G32B32A32_SFLOAT:
820 	case VK_FORMAT_R32G32B32A32_SINT:
821 	case VK_FORMAT_R32G32B32A32_UINT:
822 		dst.move(0, packed[0]);
823 		dst.move(1, packed[1]);
824 		dst.move(2, packed[2]);
825 		dst.move(3, packed[3]);
826 		break;
827 	case VK_FORMAT_R32_SINT:
828 	case VK_FORMAT_R32_UINT:
829 		dst.move(0, packed[0]);
830 		// Fill remaining channels with 0,0,1 (of the correct type)
831 		dst.move(1, SIMD::Int(0));
832 		dst.move(2, SIMD::Int(0));
833 		dst.move(3, SIMD::Int(1));
834 		break;
835 	case VK_FORMAT_R32_SFLOAT:
836 	case VK_FORMAT_D32_SFLOAT:
837 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
838 		dst.move(0, packed[0]);
839 		// Fill remaining channels with 0,0,1 (of the correct type)
840 		dst.move(1, SIMD::Float(0.0f));
841 		dst.move(2, SIMD::Float(0.0f));
842 		dst.move(3, SIMD::Float(1.0f));
843 		break;
844 	case VK_FORMAT_D16_UNORM:
845 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
846 		dst.move(1, SIMD::Float(0.0f));
847 		dst.move(2, SIMD::Float(0.0f));
848 		dst.move(3, SIMD::Float(1.0f));
849 		break;
850 	case VK_FORMAT_R16G16B16A16_UNORM:
851 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
852 		dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
853 		dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
854 		dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
855 		break;
856 	case VK_FORMAT_R16G16B16A16_SNORM:
857 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
858 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
859 		dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
860 		dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
861 		break;
862 	case VK_FORMAT_R16G16B16A16_SINT:
863 		dst.move(0, (packed[0] << 16) >> 16);
864 		dst.move(1, packed[0] >> 16);
865 		dst.move(2, (packed[1] << 16) >> 16);
866 		dst.move(3, packed[1] >> 16);
867 		break;
868 	case VK_FORMAT_R16G16B16A16_UINT:
869 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
870 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
871 		dst.move(2, packed[1] & SIMD::Int(0xFFFF));
872 		dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
873 		break;
874 	case VK_FORMAT_R16G16B16A16_SFLOAT:
875 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
876 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
877 		dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
878 		dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
879 		break;
880 	case VK_FORMAT_R8G8B8A8_SNORM:
881 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
882 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
883 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
884 		dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
885 		dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
886 		break;
887 	case VK_FORMAT_R8G8B8A8_UNORM:
888 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
889 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
890 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
891 		dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
892 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
893 		break;
894 	case VK_FORMAT_R8G8B8A8_SRGB:
895 	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
896 		dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
897 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
898 		dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
899 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
900 		break;
901 	case VK_FORMAT_B8G8R8A8_UNORM:
902 		dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
903 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
904 		dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
905 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
906 		break;
907 	case VK_FORMAT_B8G8R8A8_SRGB:
908 		dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
909 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
910 		dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
911 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
912 		break;
913 	case VK_FORMAT_R8G8B8A8_UINT:
914 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
915 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
916 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
917 		dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
918 		dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
919 		break;
920 	case VK_FORMAT_R8G8B8A8_SINT:
921 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
922 		dst.move(0, (packed[0] << 24) >> 24);
923 		dst.move(1, (packed[0] << 16) >> 24);
924 		dst.move(2, (packed[0] << 8) >> 24);
925 		dst.move(3, packed[0] >> 24);
926 		break;
927 	case VK_FORMAT_R8_UNORM:
928 		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
929 		dst.move(1, SIMD::Float(0.0f));
930 		dst.move(2, SIMD::Float(0.0f));
931 		dst.move(3, SIMD::Float(1.0f));
932 		break;
933 	case VK_FORMAT_R8_SNORM:
934 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
935 		dst.move(1, SIMD::Float(0.0f));
936 		dst.move(2, SIMD::Float(0.0f));
937 		dst.move(3, SIMD::Float(1.0f));
938 		break;
939 	case VK_FORMAT_R8_UINT:
940 	case VK_FORMAT_S8_UINT:
941 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
942 		dst.move(1, SIMD::UInt(0));
943 		dst.move(2, SIMD::UInt(0));
944 		dst.move(3, SIMD::UInt(1));
945 		break;
946 	case VK_FORMAT_R8_SINT:
947 		dst.move(0, (packed[0] << 24) >> 24);
948 		dst.move(1, SIMD::Int(0));
949 		dst.move(2, SIMD::Int(0));
950 		dst.move(3, SIMD::Int(1));
951 		break;
952 	case VK_FORMAT_R8G8_UNORM:
953 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
954 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
955 		dst.move(2, SIMD::Float(0.0f));
956 		dst.move(3, SIMD::Float(1.0f));
957 		break;
958 	case VK_FORMAT_R8G8_SNORM:
959 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
960 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
961 		dst.move(2, SIMD::Float(0.0f));
962 		dst.move(3, SIMD::Float(1.0f));
963 		break;
964 	case VK_FORMAT_R8G8_UINT:
965 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
966 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
967 		dst.move(2, SIMD::UInt(0));
968 		dst.move(3, SIMD::UInt(1));
969 		break;
970 	case VK_FORMAT_R8G8_SINT:
971 		dst.move(0, (packed[0] << 24) >> 24);
972 		dst.move(1, (packed[0] << 16) >> 24);
973 		dst.move(2, SIMD::Int(0));
974 		dst.move(3, SIMD::Int(1));
975 		break;
976 	case VK_FORMAT_R16_SFLOAT:
977 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
978 		dst.move(1, SIMD::Float(0.0f));
979 		dst.move(2, SIMD::Float(0.0f));
980 		dst.move(3, SIMD::Float(1.0f));
981 		break;
982 	case VK_FORMAT_R16_UNORM:
983 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
984 		dst.move(1, SIMD::Float(0.0f));
985 		dst.move(2, SIMD::Float(0.0f));
986 		dst.move(3, SIMD::Float(1.0f));
987 		break;
988 	case VK_FORMAT_R16_SNORM:
989 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
990 		dst.move(1, SIMD::Float(0.0f));
991 		dst.move(2, SIMD::Float(0.0f));
992 		dst.move(3, SIMD::Float(1.0f));
993 		break;
994 	case VK_FORMAT_R16_UINT:
995 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
996 		dst.move(1, SIMD::UInt(0));
997 		dst.move(2, SIMD::UInt(0));
998 		dst.move(3, SIMD::UInt(1));
999 		break;
1000 	case VK_FORMAT_R16_SINT:
1001 		dst.move(0, (packed[0] << 16) >> 16);
1002 		dst.move(1, SIMD::Int(0));
1003 		dst.move(2, SIMD::Int(0));
1004 		dst.move(3, SIMD::Int(1));
1005 		break;
1006 	case VK_FORMAT_R16G16_SFLOAT:
1007 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1008 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
1009 		dst.move(2, SIMD::Float(0.0f));
1010 		dst.move(3, SIMD::Float(1.0f));
1011 		break;
1012 	case VK_FORMAT_R16G16_UNORM:
1013 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1014 		dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1015 		dst.move(2, SIMD::Float(0.0f));
1016 		dst.move(3, SIMD::Float(1.0f));
1017 		break;
1018 	case VK_FORMAT_R16G16_SNORM:
1019 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1020 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1021 		dst.move(2, SIMD::Float(0.0f));
1022 		dst.move(3, SIMD::Float(1.0f));
1023 		break;
1024 	case VK_FORMAT_R16G16_UINT:
1025 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1026 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1027 		dst.move(2, SIMD::UInt(0));
1028 		dst.move(3, SIMD::UInt(1));
1029 		break;
1030 	case VK_FORMAT_R16G16_SINT:
1031 		dst.move(0, (packed[0] << 16) >> 16);
1032 		dst.move(1, packed[0] >> 16);
1033 		dst.move(2, SIMD::Int(0));
1034 		dst.move(3, SIMD::Int(1));
1035 		break;
1036 	case VK_FORMAT_R32G32_SINT:
1037 	case VK_FORMAT_R32G32_UINT:
1038 		dst.move(0, packed[0]);
1039 		dst.move(1, packed[1]);
1040 		dst.move(2, SIMD::Int(0));
1041 		dst.move(3, SIMD::Int(1));
1042 		break;
1043 	case VK_FORMAT_R32G32_SFLOAT:
1044 		dst.move(0, packed[0]);
1045 		dst.move(1, packed[1]);
1046 		dst.move(2, SIMD::Float(0.0f));
1047 		dst.move(3, SIMD::Float(1.0f));
1048 		break;
1049 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1050 		dst.move(0, packed[0] & SIMD::Int(0x3FF));
1051 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1052 		dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1053 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1054 		break;
1055 	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1056 		dst.move(2, packed[0] & SIMD::Int(0x3FF));
1057 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1058 		dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1059 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1060 		break;
1061 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1062 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1063 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1064 		dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1065 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1066 		break;
1067 	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1068 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1069 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1070 		dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1071 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1072 		break;
1073 	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1074 		dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1075 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1076 		dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1077 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1078 		break;
1079 	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1080 		dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1081 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1082 		dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1083 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1084 		break;
1085 	case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
1086 		dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1087 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1088 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1089 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1090 		break;
1091 	case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
1092 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1093 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1094 		dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1095 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1096 		break;
1097 	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1098 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1099 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1100 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1101 		dst.move(3, SIMD::Float(1.0f));
1102 		break;
1103 	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1104 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1105 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1106 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1107 		dst.move(3, SIMD::Float(1.0f));
1108 		break;
1109 	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1110 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1111 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1112 		dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1113 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1114 		break;
1115 	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1116 		dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1117 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1118 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1119 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1120 		break;
1121 	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1122 		dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1123 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1124 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1125 		dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1126 		break;
1127 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1128 		dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1129 		dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1130 		dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1131 		dst.move(3, SIMD::Float(1.0f));
1132 		break;
1133 	default:
1134 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1135 		break;
1136 	}
1137 
1138 	return EmitResult::Continue;
1139 }
1140 
EmitImageWrite(const ImageInstruction & instruction,EmitState * state) const1141 SpirvShader::EmitResult SpirvShader::EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const
1142 {
1143 	imageWriteEmitted = true;
1144 
1145 	auto &image = getObject(instruction.imageId);
1146 	auto &imageType = getType(image);
1147 
1148 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1149 	ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData);  // "Its Dim operand must not be SubpassData."
1150 
1151 	auto coordinate = Operand(this, state, instruction.coordinateId);
1152 	auto texel = Operand(this, state, instruction.texelId);
1153 
1154 	Array<SIMD::Int> coord(5);  // uvwa & sample
1155 
1156 	uint32_t i = 0;
1157 	for(; i < instruction.coordinates; i++)
1158 	{
1159 		coord[i] = coordinate.Int(i);
1160 	}
1161 
1162 	if(instruction.sample)
1163 	{
1164 		coord[i] = Operand(this, state, instruction.sampleId).Int(0);
1165 	}
1166 
1167 	Array<SIMD::Int> texelAndMask(5);
1168 	texelAndMask[0] = texel.Int(0);
1169 	texelAndMask[1] = texel.Int(1);
1170 	texelAndMask[2] = texel.Int(2);
1171 	texelAndMask[3] = texel.Int(3);
1172 	texelAndMask[4] = state->activeStoresAndAtomicsMask();
1173 
1174 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
1175 
1176 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1177 
1178 	if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
1179 	{
1180 		Pointer<Byte> samplerFunction = lookupSamplerFunction(descriptor, instruction, state);
1181 
1182 		Call<ImageSampler>(samplerFunction, descriptor, &coord, &texelAndMask, state->routine->constants);
1183 	}
1184 	else
1185 	{
1186 		WriteImage(instruction, descriptor, &coord, &texelAndMask, imageFormat);
1187 	}
1188 
1189 	return EmitResult::Continue;
1190 }
1191 
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1192 void SpirvShader::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1193 {
1194 	SIMD::Int texel[4];
1195 	texel[0] = texelAndMask[0];
1196 	texel[1] = texelAndMask[1];
1197 	texel[2] = texelAndMask[2];
1198 	texel[3] = texelAndMask[3];
1199 	SIMD::Int mask = texelAndMask[4];
1200 
1201 	SIMD::Int packed[4];
1202 	switch(imageFormat)
1203 	{
1204 	case VK_FORMAT_R32G32B32A32_SFLOAT:
1205 	case VK_FORMAT_R32G32B32A32_SINT:
1206 	case VK_FORMAT_R32G32B32A32_UINT:
1207 		packed[0] = texel[0];
1208 		packed[1] = texel[1];
1209 		packed[2] = texel[2];
1210 		packed[3] = texel[3];
1211 		break;
1212 	case VK_FORMAT_R32_SFLOAT:
1213 	case VK_FORMAT_R32_SINT:
1214 	case VK_FORMAT_R32_UINT:
1215 		packed[0] = texel[0];
1216 		break;
1217 	case VK_FORMAT_R8G8B8A8_UNORM:
1218 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1219 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1220 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1221 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1222 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1223 		break;
1224 	case VK_FORMAT_B8G8R8A8_UNORM:
1225 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1226 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1227 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1228 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1229 		break;
1230 	case VK_FORMAT_B8G8R8A8_SRGB:
1231 		packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1232 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1233 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1234 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[3])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1235 		break;
1236 	case VK_FORMAT_R8G8B8A8_SNORM:
1237 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1238 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1239 		             SIMD::Int(0xFF)) |
1240 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1241 		              SIMD::Int(0xFF))
1242 		             << 8) |
1243 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1244 		              SIMD::Int(0xFF))
1245 		             << 16) |
1246 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1247 		              SIMD::Int(0xFF))
1248 		             << 24);
1249 		break;
1250 	case VK_FORMAT_R8G8B8A8_SINT:
1251 	case VK_FORMAT_R8G8B8A8_UINT:
1252 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1253 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1254 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1255 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1256 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1257 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1258 		break;
1259 	case VK_FORMAT_R16G16B16A16_SFLOAT:
1260 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1261 		packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1262 		break;
1263 	case VK_FORMAT_R16G16B16A16_SINT:
1264 	case VK_FORMAT_R16G16B16A16_UINT:
1265 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1266 		packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1267 		break;
1268 	case VK_FORMAT_R32G32_SFLOAT:
1269 	case VK_FORMAT_R32G32_SINT:
1270 	case VK_FORMAT_R32G32_UINT:
1271 		packed[0] = texel[0];
1272 		packed[1] = texel[1];
1273 		break;
1274 	case VK_FORMAT_R16G16_SFLOAT:
1275 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1276 		break;
1277 	case VK_FORMAT_R16G16_SINT:
1278 	case VK_FORMAT_R16G16_UINT:
1279 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1280 		break;
1281 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1282 		// Truncates instead of rounding. See b/147900455
1283 		packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1284 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1285 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1286 		break;
1287 	case VK_FORMAT_R16_SFLOAT:
1288 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1289 		break;
1290 	case VK_FORMAT_R16G16B16A16_UNORM:
1291 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1292 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1293 		packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1294 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1295 		break;
1296 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1297 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1298 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1299 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1300 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1301 		break;
1302 	case VK_FORMAT_R16G16_UNORM:
1303 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1304 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1305 		break;
1306 	case VK_FORMAT_R8G8_UNORM:
1307 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1308 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1309 		break;
1310 	case VK_FORMAT_R16_UNORM:
1311 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1312 		break;
1313 	case VK_FORMAT_R8_UNORM:
1314 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1315 		break;
1316 	case VK_FORMAT_R16G16B16A16_SNORM:
1317 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1318 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1319 		packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1320 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1321 		break;
1322 	case VK_FORMAT_R16G16_SNORM:
1323 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1324 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1325 		break;
1326 	case VK_FORMAT_R8G8_SNORM:
1327 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1328 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1329 		break;
1330 	case VK_FORMAT_R16_SNORM:
1331 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1332 		break;
1333 	case VK_FORMAT_R8_SNORM:
1334 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1335 		break;
1336 	case VK_FORMAT_R8G8_SINT:
1337 	case VK_FORMAT_R8G8_UINT:
1338 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1339 		break;
1340 	case VK_FORMAT_R16_SINT:
1341 	case VK_FORMAT_R16_UINT:
1342 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1343 		break;
1344 	case VK_FORMAT_R8_SINT:
1345 	case VK_FORMAT_R8_UINT:
1346 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1347 		break;
1348 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1349 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1350 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1351 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1352 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1353 		break;
1354 	default:
1355 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1356 		break;
1357 	}
1358 
1359 	// "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1360 	//  validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1361 	// - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1362 	auto robustness = OutOfBoundsBehavior::Nullify;
1363 	// GetTexelAddress() only needs the EmitState* for SubpassData accesses (i.e. input attachments).
1364 	const EmitState *state = nullptr;
1365 
1366 	SIMD::Int uvwa[4];
1367 	SIMD::Int sample;
1368 
1369 	uint32_t i = 0;
1370 	for(; i < instruction.coordinates; i++)
1371 	{
1372 		uvwa[i] = As<SIMD::Int>(coord[i]);
1373 	}
1374 
1375 	if(instruction.sample)
1376 	{
1377 		sample = As<SIMD::Int>(coord[i]);
1378 	}
1379 
1380 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1381 
1382 	const int texelSize = imageFormat.bytes();
1383 
1384 	// Scatter packed texel data.
1385 	// TODO(b/160531165): Provide scatter abstractions for various element sizes.
1386 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1387 	{
1388 		for(auto i = 0; i < texelSize / 4; i++)
1389 		{
1390 			texelPtr.Store(packed[i], robustness, mask);
1391 			texelPtr += sizeof(float);
1392 		}
1393 	}
1394 	else if(texelSize == 2)
1395 	{
1396 		SIMD::Int offsets = texelPtr.offsets();
1397 		mask = mask & texelPtr.isInBounds(2, robustness);
1398 
1399 		for(int i = 0; i < SIMD::Width; i++)
1400 		{
1401 			If(Extract(mask, i) != 0)
1402 			{
1403 				*Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
1404 			}
1405 		}
1406 	}
1407 	else if(texelSize == 1)
1408 	{
1409 		SIMD::Int offsets = texelPtr.offsets();
1410 		mask = mask & texelPtr.isInBounds(1, robustness);
1411 
1412 		for(int i = 0; i < SIMD::Width; i++)
1413 		{
1414 			If(Extract(mask, i) != 0)
1415 			{
1416 				*Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
1417 			}
1418 		}
1419 	}
1420 	else
1421 		UNREACHABLE("texelSize: %d", int(texelSize));
1422 }
1423 
EmitImageTexelPointer(const ImageInstruction & instruction,EmitState * state) const1424 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const
1425 {
1426 	auto coordinate = Operand(this, state, instruction.coordinateId);
1427 
1428 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
1429 
1430 	// VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1431 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1432 	auto robustness = OutOfBoundsBehavior::Nullify;
1433 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1434 
1435 	SIMD::Int uvwa[4];
1436 
1437 	for(uint32_t i = 0; i < instruction.coordinates; i++)
1438 	{
1439 		uvwa[i] = coordinate.Int(i);
1440 	}
1441 
1442 	SIMD::Int sample = Operand(this, state, instruction.sampleId).Int(0);
1443 
1444 	auto ptr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1445 
1446 	state->createPointer(instruction.resultId, ptr);
1447 
1448 	return EmitResult::Continue;
1449 }
1450 
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1451 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1452 {
1453 	// Propagate the image pointer in both cases.
1454 	// Consumers of OpSampledImage will look through to find the sampler pointer.
1455 
1456 	Object::ID resultId = insn.word(2);
1457 	Object::ID imageId = insn.word(3);
1458 
1459 	state->createPointer(resultId, state->getPointer(imageId));
1460 
1461 	return EmitResult::Continue;
1462 }
1463 
1464 }  // namespace sw