• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "System/Types.hpp"
18 
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21 
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace {
25 
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 	switch(format)
29 	{
30 	case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 	case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 	case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 	case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 	case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 	case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 	case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 	case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 	case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 	case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 	case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 	case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 	case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 	case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 	case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 	case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 	case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 	case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 	case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 	case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 	case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 	case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 	case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 	case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70 
71 	default:
72 		UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 		return VK_FORMAT_UNDEFINED;
74 	}
75 }
76 
sRGBtoLinear(sw::SIMD::Float c)77 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
78 {
79 	sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
80 	sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
81 
82 	sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
83 
84 	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));  // TODO: IfThenElse()
85 }
86 
87 }  // anonymous namespace
88 
89 namespace sw {
90 
ImageInstruction(InsnIterator insn,const SpirvShader & spirv)91 SpirvShader::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &spirv)
92     : ImageInstructionSignature(parseVariantAndMethod(insn))
93     , position(insn.distanceFrom(spirv.begin()))
94 {
95 	if(samplerMethod == Write)
96 	{
97 		imageId = insn.word(1);
98 		coordinateId = insn.word(2);
99 		texelId = insn.word(3);
100 	}
101 	else
102 	{
103 		resultTypeId = insn.resultTypeId();  // word(1)
104 		resultId = insn.resultId();          // word(2)
105 
106 		if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer)  // Samplerless
107 		{
108 			imageId = insn.word(3);
109 		}
110 		else
111 		{
112 			Object::ID sampledImageId = insn.word(3);
113 			const Object &sampledImage = spirv.getObject(sampledImageId);
114 
115 			if(sampledImage.opcode() == spv::OpSampledImage)
116 			{
117 				imageId = sampledImage.definition.word(3);
118 				samplerId = sampledImage.definition.word(4);
119 			}
120 			else  // Combined image/sampler
121 			{
122 				imageId = sampledImageId;
123 				samplerId = sampledImageId;
124 			}
125 		}
126 
127 		coordinateId = insn.word(4);
128 	}
129 
130 	// `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
131 	// To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
132 	const Type &imageObjectType = spirv.getObjectType(imageId);
133 	const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
134 	                                     ? spirv.getType(imageObjectType.definition.word(2))
135 	                                     : imageObjectType;
136 	const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
137 	                             ? spirv.getType(imageReferenceType.element)
138 	                             : imageReferenceType);
139 
140 	ASSERT(imageType.opcode() == spv::OpTypeImage);
141 	dim = imageType.definition.word(3);
142 	arrayed = imageType.definition.word(5);
143 	imageFormat = imageType.definition.word(8);
144 
145 	const Object &coordinateObject = spirv.getObject(coordinateId);
146 	const Type &coordinateType = spirv.getType(coordinateObject);
147 	coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
148 
149 	if(samplerMethod == TexelPointer)
150 	{
151 		sampleId = insn.word(5);
152 		sample = !spirv.getObject(sampleId).isConstantZero();
153 	}
154 
155 	if(isDref())
156 	{
157 		drefId = insn.word(5);
158 	}
159 
160 	if(samplerMethod == Gather)
161 	{
162 		gatherComponent = !isDref() ? spirv.getObject(insn.word(5)).constantValue[0] : 0;
163 	}
164 
165 	uint32_t operandsIndex = getImageOperandsIndex(insn);
166 	uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0;  // The mask which indicates which operands are provided.
167 
168 	operandsIndex += 1;  // Advance to the first actual operand <id> location.
169 
170 	if(imageOperands & spv::ImageOperandsBiasMask)
171 	{
172 		ASSERT(samplerMethod == Bias);
173 		lodOrBiasId = insn.word(operandsIndex);
174 		operandsIndex += 1;
175 		imageOperands &= ~spv::ImageOperandsBiasMask;
176 	}
177 
178 	if(imageOperands & spv::ImageOperandsLodMask)
179 	{
180 		ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
181 		lodOrBiasId = insn.word(operandsIndex);
182 		operandsIndex += 1;
183 		imageOperands &= ~spv::ImageOperandsLodMask;
184 	}
185 
186 	if(imageOperands & spv::ImageOperandsGradMask)
187 	{
188 		ASSERT(samplerMethod == Grad);
189 		gradDxId = insn.word(operandsIndex + 0);
190 		gradDyId = insn.word(operandsIndex + 1);
191 		operandsIndex += 2;
192 		imageOperands &= ~spv::ImageOperandsGradMask;
193 
194 		grad = spirv.getObjectType(gradDxId).componentCount;
195 	}
196 
197 	if(imageOperands & spv::ImageOperandsConstOffsetMask)
198 	{
199 		offsetId = insn.word(operandsIndex);
200 		operandsIndex += 1;
201 		imageOperands &= ~spv::ImageOperandsConstOffsetMask;
202 
203 		offset = spirv.getObjectType(offsetId).componentCount;
204 	}
205 
206 	if(imageOperands & spv::ImageOperandsSampleMask)
207 	{
208 		ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
209 		sampleId = insn.word(operandsIndex);
210 		operandsIndex += 1;
211 		imageOperands &= ~spv::ImageOperandsSampleMask;
212 
213 		sample = !spirv.getObject(sampleId).isConstantZero();
214 	}
215 
216 	// TODO(b/174475384)
217 	if(imageOperands & spv::ImageOperandsZeroExtendMask)
218 	{
219 		ASSERT(samplerMethod == Read || samplerMethod == Write);
220 		imageOperands &= ~spv::ImageOperandsZeroExtendMask;
221 	}
222 	else if(imageOperands & spv::ImageOperandsSignExtendMask)
223 	{
224 		ASSERT(samplerMethod == Read || samplerMethod == Write);
225 		imageOperands &= ~spv::ImageOperandsSignExtendMask;
226 	}
227 
228 	if(imageOperands != 0)
229 	{
230 		UNSUPPORTED("Image operands 0x%08X", imageOperands);
231 	}
232 }
233 
parseVariantAndMethod(InsnIterator insn)234 SpirvShader::ImageInstructionSignature SpirvShader::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
235 {
236 	uint32_t imageOperands = getImageOperandsMask(insn);
237 	bool bias = imageOperands & spv::ImageOperandsBiasMask;
238 	bool grad = imageOperands & spv::ImageOperandsGradMask;
239 
240 	switch(insn.opcode())
241 	{
242 	case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
243 	case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
244 	case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
245 	case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
246 	case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
247 	case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
248 	case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
249 	case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
250 	case spv::OpImageGather: return { None, Gather };
251 	case spv::OpImageDrefGather: return { Dref, Gather };
252 	case spv::OpImageFetch: return { None, Fetch };
253 	case spv::OpImageQueryLod: return { None, Query };
254 	case spv::OpImageRead: return { None, Read };
255 	case spv::OpImageWrite: return { None, Write };
256 	case spv::OpImageTexelPointer: return { None, TexelPointer };
257 
258 	default:
259 		ASSERT(false);
260 		return { None, Implicit };
261 	}
262 }
263 
264 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)265 uint32_t SpirvShader::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
266 {
267 	switch(insn.opcode())
268 	{
269 	case spv::OpImageSampleImplicitLod:
270 	case spv::OpImageSampleProjImplicitLod:
271 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
272 	case spv::OpImageSampleExplicitLod:
273 	case spv::OpImageSampleProjExplicitLod:
274 		return 5;  // "Either Lod or Grad image operands must be present."
275 	case spv::OpImageSampleDrefImplicitLod:
276 	case spv::OpImageSampleProjDrefImplicitLod:
277 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
278 	case spv::OpImageSampleDrefExplicitLod:
279 	case spv::OpImageSampleProjDrefExplicitLod:
280 		return 6;  // "Either Lod or Grad image operands must be present."
281 	case spv::OpImageGather:
282 	case spv::OpImageDrefGather:
283 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
284 	case spv::OpImageFetch:
285 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
286 	case spv::OpImageQueryLod:
287 		ASSERT(insn.wordCount() == 5);
288 		return 0;  // No image operands.
289 	case spv::OpImageRead:
290 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
291 	case spv::OpImageWrite:
292 		return insn.wordCount() > 4 ? 4 : 0;  // Optional
293 	case spv::OpImageTexelPointer:
294 		ASSERT(insn.wordCount() == 6);
295 		return 0;  // No image operands.
296 
297 	default:
298 		ASSERT(false);
299 		return 0;
300 	}
301 }
302 
getImageOperandsMask(InsnIterator insn)303 uint32_t SpirvShader::ImageInstruction::getImageOperandsMask(InsnIterator insn)
304 {
305 	uint32_t operandsIndex = getImageOperandsIndex(insn);
306 	return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
307 }
308 
EmitImageSample(const ImageInstruction & instruction,EmitState * state) const309 SpirvShader::EmitResult SpirvShader::EmitImageSample(const ImageInstruction &instruction, EmitState *state) const
310 {
311 	auto &resultType = getType(instruction.resultTypeId);
312 	auto &result = state->createIntermediate(instruction.resultId, resultType.componentCount);
313 	Array<SIMD::Float> out(4);
314 
315 	// TODO(b/153380916): When we're in a code path that is always executed,
316 	// i.e. post-dominators of the entry block, we don't have to dynamically
317 	// check whether any lanes are active, and can elide the jump.
318 	If(AnyTrue(state->activeLaneMask()))
319 	{
320 		EmitImageSampleUnconditional(out, instruction, state);
321 	}
322 
323 	for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
324 
325 	return EmitResult::Continue;
326 }
327 
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction,EmitState * state) const328 void SpirvShader::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const
329 {
330 	Pointer<Byte> imageDescriptor = state->getPointer(instruction.imageId).base;  // vk::SampledImageDescriptor*
331 
332 	Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, instruction, state);
333 
334 	callSamplerFunction(samplerFunction, out, imageDescriptor, instruction, state);
335 }
336 
lookupSamplerFunction(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const337 Pointer<Byte> SpirvShader::lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
338 {
339 	Int samplerId = 0;
340 
341 	if(instruction.samplerId != 0)
342 	{
343 		Pointer<Byte> samplerDescriptor = state->getPointer(instruction.samplerId).base;  // vk::SampledImageDescriptor*
344 
345 		samplerId = *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId));  // vk::Sampler::id
346 	}
347 
348 	auto &cache = state->routine->samplerCache.at(instruction.position);
349 	Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId);  // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
350 
351 	If(!cacheHit)
352 	{
353 		rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
354 		cache.function = Call(getImageSampler, state->routine->device, instruction.signature, samplerId, imageViewId);
355 		cache.imageDescriptor = imageDescriptor;
356 		cache.samplerId = samplerId;
357 	}
358 
359 	return cache.function;
360 }
361 
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const362 void SpirvShader::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
363 {
364 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.
365 
366 	auto coordinate = Operand(this, state, instruction.coordinateId);
367 
368 	uint32_t i = 0;
369 	for(; i < instruction.coordinates; i++)
370 	{
371 		if(instruction.isProj())
372 		{
373 			in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
374 		}
375 		else
376 		{
377 			in[i] = coordinate.Float(i);
378 		}
379 	}
380 
381 	if(instruction.isDref())
382 	{
383 		auto drefValue = Operand(this, state, instruction.drefId);
384 
385 		if(instruction.isProj())
386 		{
387 			in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
388 		}
389 		else
390 		{
391 			in[i] = drefValue.Float(0);
392 		}
393 
394 		i++;
395 	}
396 
397 	if(instruction.lodOrBiasId != 0)
398 	{
399 		auto lodValue = Operand(this, state, instruction.lodOrBiasId);
400 		in[i] = lodValue.Float(0);
401 		i++;
402 	}
403 	else if(instruction.gradDxId != 0)
404 	{
405 		auto dxValue = Operand(this, state, instruction.gradDxId);
406 		auto dyValue = Operand(this, state, instruction.gradDyId);
407 		ASSERT(dxValue.componentCount == dxValue.componentCount);
408 
409 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
410 		{
411 			in[i] = dxValue.Float(j);
412 		}
413 
414 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
415 		{
416 			in[i] = dyValue.Float(j);
417 		}
418 	}
419 	else if(instruction.samplerMethod == Fetch)
420 	{
421 		// The instruction didn't provide a lod operand, but the sampler's Fetch
422 		// function requires one to be present. If no lod is supplied, the default
423 		// is zero.
424 		in[i] = As<SIMD::Float>(SIMD::Int(0));
425 		i++;
426 	}
427 
428 	if(instruction.offsetId != 0)
429 	{
430 		auto offsetValue = Operand(this, state, instruction.offsetId);
431 
432 		for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
433 		{
434 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
435 		}
436 	}
437 
438 	if(instruction.sample)
439 	{
440 		auto sampleValue = Operand(this, state, instruction.sampleId);
441 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
442 	}
443 
444 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);  // sw::Texture*
445 
446 	Call<ImageSampler>(samplerFunction, texture, &in, &out, state->routine->constants);
447 }
448 
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const449 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
450 {
451 	auto &resultTy = getType(insn.resultTypeId());
452 	auto imageId = Object::ID(insn.word(3));
453 	auto lodId = Object::ID(insn.word(4));
454 
455 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
456 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
457 
458 	return EmitResult::Continue;
459 }
460 
EmitImageQuerySize(InsnIterator insn,EmitState * state) const461 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
462 {
463 	auto &resultTy = getType(insn.resultTypeId());
464 	auto imageId = Object::ID(insn.word(3));
465 	auto lodId = Object::ID(0);
466 
467 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
468 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
469 
470 	return EmitResult::Continue;
471 }
472 
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const473 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
474 {
475 	auto routine = state->routine;
476 	auto &image = getObject(imageId);
477 	auto &imageType = getType(image);
478 
479 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
480 	bool isArrayed = imageType.definition.word(5) != 0;
481 	uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
482 
483 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
484 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
485 
486 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
487 
488 	Int width;
489 	Int height;
490 	Int depth;
491 
492 	switch(descriptorType)
493 	{
494 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
495 	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
496 		width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
497 		height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
498 		depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
499 		break;
500 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
501 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
502 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
503 		width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
504 		height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
505 		depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
506 		break;
507 	default:
508 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
509 	}
510 
511 	if(lodId != 0)
512 	{
513 		auto lodVal = Operand(this, state, lodId);
514 		ASSERT(lodVal.componentCount == 1);
515 		auto lod = lodVal.Int(0);
516 		auto one = SIMD::Int(1);
517 
518 		if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
519 		if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
520 		if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
521 	}
522 	else
523 	{
524 
525 		if(dimensions >= 1) dst.move(0, SIMD::Int(width));
526 		if(dimensions >= 2) dst.move(1, SIMD::Int(height));
527 		if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
528 	}
529 
530 	if(isArrayed)
531 	{
532 		dst.move(dimensions, SIMD::Int(depth));
533 	}
534 }
535 
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const536 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
537 {
538 	auto &resultTy = getType(insn.resultTypeId());
539 	ASSERT(resultTy.componentCount == 1);
540 	auto imageId = Object::ID(insn.word(3));
541 
542 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
543 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
544 
545 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
546 	Int mipLevels = 0;
547 	switch(descriptorType)
548 	{
549 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
550 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
551 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
552 		mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
553 		break;
554 	default:
555 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
556 	}
557 
558 	auto &dst = state->createIntermediate(insn.resultId(), 1);
559 	dst.move(0, SIMD::Int(mipLevels));
560 
561 	return EmitResult::Continue;
562 }
563 
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const564 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
565 {
566 	auto &resultTy = getType(insn.resultTypeId());
567 	ASSERT(resultTy.componentCount == 1);
568 	auto imageId = Object::ID(insn.word(3));
569 	auto imageTy = getObjectType(imageId);
570 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
571 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
572 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);
573 
574 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
575 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
576 
577 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
578 	Int sampleCount = 0;
579 	switch(descriptorType)
580 	{
581 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
582 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
583 		break;
584 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
585 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
586 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
587 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
588 		break;
589 	default:
590 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
591 	}
592 
593 	auto &dst = state->createIntermediate(insn.resultId(), 1);
594 	dst.move(0, SIMD::Int(sampleCount));
595 
596 	return EmitResult::Continue;
597 }
598 
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const EmitState * state)599 SIMD::Pointer SpirvShader::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state)
600 {
601 	bool isArrayed = instruction.arrayed;
602 	spv::Dim dim = static_cast<spv::Dim>(instruction.dim);
603 	int dims = instruction.coordinates - (isArrayed ? 1 : 0);
604 
605 	SIMD::Int u = coordinate[0];
606 	SIMD::Int v = SIMD::Int(0);
607 
608 	if(dims > 1)
609 	{
610 		v = coordinate[1];
611 	}
612 
613 	if(dim == spv::DimSubpassData)
614 	{
615 		u += state->routine->windowSpacePosition[0];
616 		v += state->routine->windowSpacePosition[1];
617 	}
618 
619 	const int texelSize = imageFormat.bytes();
620 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
621 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
622 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
623 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
624 	auto slicePitch = SIMD::Int(
625 	    *Pointer<Int>(descriptor + (useStencilAspect
626 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
627 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
628 	auto samplePitch = SIMD::Int(
629 	    *Pointer<Int>(descriptor + (useStencilAspect
630 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
631 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
632 
633 	SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
634 
635 	if(dims > 1)
636 	{
637 		ptrOffset += v * rowPitch;
638 	}
639 
640 	SIMD::Int w = 0;
641 	if((dims > 2) || isArrayed)
642 	{
643 		if(dims > 2)
644 		{
645 			w += coordinate[2];
646 		}
647 
648 		if(isArrayed)
649 		{
650 			w += coordinate[dims];
651 		}
652 
653 		ptrOffset += w * slicePitch;
654 	}
655 
656 	if(dim == spv::DimSubpassData)
657 	{
658 		// Multiview input attachment access is to the layer corresponding to the current view
659 		ptrOffset += SIMD::Int(state->routine->viewID) * slicePitch;
660 	}
661 
662 	if(instruction.sample)
663 	{
664 		ptrOffset += sample * samplePitch;
665 	}
666 
667 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
668 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
669 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
670 	{
671 		SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
672 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
673 
674 		if(dims > 1)
675 		{
676 			SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
677 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
678 		}
679 
680 		if((dims > 2) || isArrayed)
681 		{
682 			UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
683 			if(dim == spv::DimCube) { depth *= 6; }
684 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth)));
685 		}
686 
687 		if(instruction.sample)
688 		{
689 			SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
690 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
691 		}
692 
693 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
694 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
695 
696 		ptrOffset = (ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
697 	}
698 
699 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
700 	                                                                    ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
701 	                                                                    : OFFSET(vk::StorageImageDescriptor, ptr)));
702 
703 	Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
704 
705 	return SIMD::Pointer(imageBase, imageSizeInBytes, ptrOffset);
706 }
707 
EmitImageRead(const ImageInstruction & instruction,EmitState * state) const708 SpirvShader::EmitResult SpirvShader::EmitImageRead(const ImageInstruction &instruction, EmitState *state) const
709 {
710 	auto &resultType = getObjectType(instruction.resultId);
711 	auto &image = getObject(instruction.imageId);
712 	auto &imageType = getType(image);
713 
714 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
715 	auto dim = static_cast<spv::Dim>(instruction.dim);
716 
717 	auto coordinate = Operand(this, state, instruction.coordinateId);
718 	const DescriptorDecorations &d = descriptorDecorations.at(instruction.imageId);
719 
720 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
721 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
722 	vk::Format imageFormat = (dim == spv::DimSubpassData)
723 	                             ? inputAttachmentFormats[d.InputAttachmentIndex]
724 	                             : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
725 
726 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
727 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
728 	bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
729 	                         getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
730 
731 	if(useStencilAspect)
732 	{
733 		imageFormat = VK_FORMAT_S8_UINT;
734 	}
735 
736 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
737 	auto &dst = state->createIntermediate(instruction.resultId, resultType.componentCount);
738 
739 	// VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
740 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
741 	auto robustness = OutOfBoundsBehavior::Nullify;
742 
743 	SIMD::Int uvwa[4];
744 	SIMD::Int sample;
745 
746 	for(uint32_t i = 0; i < instruction.coordinates; i++)
747 	{
748 		uvwa[i] = coordinate.Int(i);
749 	}
750 
751 	if(instruction.sample)
752 	{
753 		sample = Operand(this, state, instruction.sampleId).Int(0);
754 	}
755 
756 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
757 
758 	const int texelSize = imageFormat.bytes();
759 
760 	// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
761 	// TODO(b/160531165): Provide gather abstractions for various element sizes.
762 	SIMD::Int packed[4];
763 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
764 	{
765 		for(auto i = 0; i < texelSize / 4; i++)
766 		{
767 			packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
768 			texelPtr += sizeof(float);
769 		}
770 	}
771 	else if(texelSize == 2)
772 	{
773 		SIMD::Int offsets = texelPtr.offsets();
774 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
775 
776 		for(int i = 0; i < SIMD::Width; i++)
777 		{
778 			If(Extract(mask, i) != 0)
779 			{
780 				packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
781 			}
782 		}
783 	}
784 	else if(texelSize == 1)
785 	{
786 		SIMD::Int offsets = texelPtr.offsets();
787 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
788 
789 		for(int i = 0; i < SIMD::Width; i++)
790 		{
791 			If(Extract(mask, i) != 0)
792 			{
793 				packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
794 			}
795 		}
796 	}
797 	else
798 		UNREACHABLE("texelSize: %d", int(texelSize));
799 
800 	// Format support requirements here come from two sources:
801 	// - Minimum required set of formats for loads from storage images
802 	// - Any format supported as a color or depth/stencil attachment, for input attachments
803 	switch(imageFormat)
804 	{
805 	case VK_FORMAT_R32G32B32A32_SFLOAT:
806 	case VK_FORMAT_R32G32B32A32_SINT:
807 	case VK_FORMAT_R32G32B32A32_UINT:
808 		dst.move(0, packed[0]);
809 		dst.move(1, packed[1]);
810 		dst.move(2, packed[2]);
811 		dst.move(3, packed[3]);
812 		break;
813 	case VK_FORMAT_R32_SINT:
814 	case VK_FORMAT_R32_UINT:
815 		dst.move(0, packed[0]);
816 		// Fill remaining channels with 0,0,1 (of the correct type)
817 		dst.move(1, SIMD::Int(0));
818 		dst.move(2, SIMD::Int(0));
819 		dst.move(3, SIMD::Int(1));
820 		break;
821 	case VK_FORMAT_R32_SFLOAT:
822 	case VK_FORMAT_D32_SFLOAT:
823 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
824 		dst.move(0, packed[0]);
825 		// Fill remaining channels with 0,0,1 (of the correct type)
826 		dst.move(1, SIMD::Float(0.0f));
827 		dst.move(2, SIMD::Float(0.0f));
828 		dst.move(3, SIMD::Float(1.0f));
829 		break;
830 	case VK_FORMAT_D16_UNORM:
831 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
832 		dst.move(1, SIMD::Float(0.0f));
833 		dst.move(2, SIMD::Float(0.0f));
834 		dst.move(3, SIMD::Float(1.0f));
835 		break;
836 	case VK_FORMAT_R16G16B16A16_UNORM:
837 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
838 		dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
839 		dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
840 		dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
841 		break;
842 	case VK_FORMAT_R16G16B16A16_SNORM:
843 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
844 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
845 		dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
846 		dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
847 		break;
848 	case VK_FORMAT_R16G16B16A16_SINT:
849 		dst.move(0, (packed[0] << 16) >> 16);
850 		dst.move(1, packed[0] >> 16);
851 		dst.move(2, (packed[1] << 16) >> 16);
852 		dst.move(3, packed[1] >> 16);
853 		break;
854 	case VK_FORMAT_R16G16B16A16_UINT:
855 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
856 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
857 		dst.move(2, packed[1] & SIMD::Int(0xFFFF));
858 		dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
859 		break;
860 	case VK_FORMAT_R16G16B16A16_SFLOAT:
861 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
862 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
863 		dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
864 		dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
865 		break;
866 	case VK_FORMAT_R8G8B8A8_SNORM:
867 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
868 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
869 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
870 		dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
871 		dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
872 		break;
873 	case VK_FORMAT_R8G8B8A8_UNORM:
874 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
875 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
876 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
877 		dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
878 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
879 		break;
880 	case VK_FORMAT_R8G8B8A8_SRGB:
881 	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
882 		dst.move(0, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
883 		dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
884 		dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
885 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
886 		break;
887 	case VK_FORMAT_B8G8R8A8_UNORM:
888 		dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
889 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
890 		dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
891 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
892 		break;
893 	case VK_FORMAT_B8G8R8A8_SRGB:
894 		dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
895 		dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
896 		dst.move(2, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
897 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
898 		break;
899 	case VK_FORMAT_R8G8B8A8_UINT:
900 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
901 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
902 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
903 		dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
904 		dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
905 		break;
906 	case VK_FORMAT_R8G8B8A8_SINT:
907 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
908 		dst.move(0, (packed[0] << 24) >> 24);
909 		dst.move(1, (packed[0] << 16) >> 24);
910 		dst.move(2, (packed[0] << 8) >> 24);
911 		dst.move(3, packed[0] >> 24);
912 		break;
913 	case VK_FORMAT_R8_UNORM:
914 		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
915 		dst.move(1, SIMD::Float(0.0f));
916 		dst.move(2, SIMD::Float(0.0f));
917 		dst.move(3, SIMD::Float(1.0f));
918 		break;
919 	case VK_FORMAT_R8_SNORM:
920 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
921 		dst.move(1, SIMD::Float(0.0f));
922 		dst.move(2, SIMD::Float(0.0f));
923 		dst.move(3, SIMD::Float(1.0f));
924 		break;
925 	case VK_FORMAT_R8_UINT:
926 	case VK_FORMAT_S8_UINT:
927 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
928 		dst.move(1, SIMD::UInt(0));
929 		dst.move(2, SIMD::UInt(0));
930 		dst.move(3, SIMD::UInt(1));
931 		break;
932 	case VK_FORMAT_R8_SINT:
933 		dst.move(0, (packed[0] << 24) >> 24);
934 		dst.move(1, SIMD::Int(0));
935 		dst.move(2, SIMD::Int(0));
936 		dst.move(3, SIMD::Int(1));
937 		break;
938 	case VK_FORMAT_R8G8_UNORM:
939 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
940 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
941 		dst.move(2, SIMD::Float(0.0f));
942 		dst.move(3, SIMD::Float(1.0f));
943 		break;
944 	case VK_FORMAT_R8G8_SNORM:
945 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
946 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
947 		dst.move(2, SIMD::Float(0.0f));
948 		dst.move(3, SIMD::Float(1.0f));
949 		break;
950 	case VK_FORMAT_R8G8_UINT:
951 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
952 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
953 		dst.move(2, SIMD::UInt(0));
954 		dst.move(3, SIMD::UInt(1));
955 		break;
956 	case VK_FORMAT_R8G8_SINT:
957 		dst.move(0, (packed[0] << 24) >> 24);
958 		dst.move(1, (packed[0] << 16) >> 24);
959 		dst.move(2, SIMD::Int(0));
960 		dst.move(3, SIMD::Int(1));
961 		break;
962 	case VK_FORMAT_R16_SFLOAT:
963 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
964 		dst.move(1, SIMD::Float(0.0f));
965 		dst.move(2, SIMD::Float(0.0f));
966 		dst.move(3, SIMD::Float(1.0f));
967 		break;
968 	case VK_FORMAT_R16_UNORM:
969 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
970 		dst.move(1, SIMD::Float(0.0f));
971 		dst.move(2, SIMD::Float(0.0f));
972 		dst.move(3, SIMD::Float(1.0f));
973 		break;
974 	case VK_FORMAT_R16_SNORM:
975 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
976 		dst.move(1, SIMD::Float(0.0f));
977 		dst.move(2, SIMD::Float(0.0f));
978 		dst.move(3, SIMD::Float(1.0f));
979 		break;
980 	case VK_FORMAT_R16_UINT:
981 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
982 		dst.move(1, SIMD::UInt(0));
983 		dst.move(2, SIMD::UInt(0));
984 		dst.move(3, SIMD::UInt(1));
985 		break;
986 	case VK_FORMAT_R16_SINT:
987 		dst.move(0, (packed[0] << 16) >> 16);
988 		dst.move(1, SIMD::Int(0));
989 		dst.move(2, SIMD::Int(0));
990 		dst.move(3, SIMD::Int(1));
991 		break;
992 	case VK_FORMAT_R16G16_SFLOAT:
993 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
994 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
995 		dst.move(2, SIMD::Float(0.0f));
996 		dst.move(3, SIMD::Float(1.0f));
997 		break;
998 	case VK_FORMAT_R16G16_UNORM:
999 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1000 		dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1001 		dst.move(2, SIMD::Float(0.0f));
1002 		dst.move(3, SIMD::Float(1.0f));
1003 		break;
1004 	case VK_FORMAT_R16G16_SNORM:
1005 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1006 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1007 		dst.move(2, SIMD::Float(0.0f));
1008 		dst.move(3, SIMD::Float(1.0f));
1009 		break;
1010 	case VK_FORMAT_R16G16_UINT:
1011 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1012 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1013 		dst.move(2, SIMD::UInt(0));
1014 		dst.move(3, SIMD::UInt(1));
1015 		break;
1016 	case VK_FORMAT_R16G16_SINT:
1017 		dst.move(0, (packed[0] << 16) >> 16);
1018 		dst.move(1, packed[0] >> 16);
1019 		dst.move(2, SIMD::Int(0));
1020 		dst.move(3, SIMD::Int(1));
1021 		break;
1022 	case VK_FORMAT_R32G32_SINT:
1023 	case VK_FORMAT_R32G32_UINT:
1024 		dst.move(0, packed[0]);
1025 		dst.move(1, packed[1]);
1026 		dst.move(2, SIMD::Int(0));
1027 		dst.move(3, SIMD::Int(1));
1028 		break;
1029 	case VK_FORMAT_R32G32_SFLOAT:
1030 		dst.move(0, packed[0]);
1031 		dst.move(1, packed[1]);
1032 		dst.move(2, SIMD::Float(0.0f));
1033 		dst.move(3, SIMD::Float(1.0f));
1034 		break;
1035 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1036 		dst.move(0, packed[0] & SIMD::Int(0x3FF));
1037 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1038 		dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1039 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1040 		break;
1041 	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1042 		dst.move(2, packed[0] & SIMD::Int(0x3FF));
1043 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1044 		dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1045 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1046 		break;
1047 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1048 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1049 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1050 		dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1051 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1052 		break;
1053 	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1054 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1055 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1056 		dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1057 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1058 		break;
1059 	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1060 		dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1061 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1062 		dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1063 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1064 		break;
1065 	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1066 		dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1067 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1068 		dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1069 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1070 		break;
1071 	case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1072 		dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1073 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1074 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1075 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1076 		break;
1077 	case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1078 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1079 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1080 		dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1081 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1082 		break;
1083 	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1084 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1085 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1086 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1087 		dst.move(3, SIMD::Float(1.0f));
1088 		break;
1089 	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1090 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1091 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1092 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1093 		dst.move(3, SIMD::Float(1.0f));
1094 		break;
1095 	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1096 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1097 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1098 		dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1099 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1100 		break;
1101 	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1102 		dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1103 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1104 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1105 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1106 		break;
1107 	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1108 		dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1109 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1110 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1111 		dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1112 		break;
1113 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1114 		dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1115 		dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1116 		dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1117 		dst.move(3, SIMD::Float(1.0f));
1118 		break;
1119 	default:
1120 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1121 		break;
1122 	}
1123 
1124 	return EmitResult::Continue;
1125 }
1126 
EmitImageWrite(const ImageInstruction & instruction,EmitState * state) const1127 SpirvShader::EmitResult SpirvShader::EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const
1128 {
1129 	imageWriteEmitted = true;
1130 
1131 	auto &image = getObject(instruction.imageId);
1132 	auto &imageType = getType(image);
1133 
1134 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1135 	ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData);  // "Its Dim operand must not be SubpassData."
1136 
1137 	auto coordinate = Operand(this, state, instruction.coordinateId);
1138 	auto texel = Operand(this, state, instruction.texelId);
1139 
1140 	Array<SIMD::Int> coord(5);  // uvwa & sample
1141 
1142 	uint32_t i = 0;
1143 	for(; i < instruction.coordinates; i++)
1144 	{
1145 		coord[i] = coordinate.Int(i);
1146 	}
1147 
1148 	if(instruction.sample)
1149 	{
1150 		coord[i] = Operand(this, state, instruction.sampleId).Int(0);
1151 	}
1152 
1153 	Array<SIMD::Int> texelAndMask(5);
1154 	texelAndMask[0] = texel.Int(0);
1155 	texelAndMask[1] = texel.Int(1);
1156 	texelAndMask[2] = texel.Int(2);
1157 	texelAndMask[3] = texel.Int(3);
1158 	texelAndMask[4] = state->activeStoresAndAtomicsMask();
1159 
1160 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
1161 
1162 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1163 
1164 	if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
1165 	{
1166 		Pointer<Byte> samplerFunction = lookupSamplerFunction(descriptor, instruction, state);
1167 
1168 		Call<ImageSampler>(samplerFunction, descriptor, &coord, &texelAndMask, state->routine->constants);
1169 	}
1170 	else
1171 	{
1172 		WriteImage(instruction, descriptor, &coord, &texelAndMask, imageFormat);
1173 	}
1174 
1175 	return EmitResult::Continue;
1176 }
1177 
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1178 void SpirvShader::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1179 {
1180 	SIMD::Int texel[4];
1181 	texel[0] = texelAndMask[0];
1182 	texel[1] = texelAndMask[1];
1183 	texel[2] = texelAndMask[2];
1184 	texel[3] = texelAndMask[3];
1185 	SIMD::Int mask = texelAndMask[4];
1186 
1187 	SIMD::Int packed[4];
1188 	switch(imageFormat)
1189 	{
1190 	case VK_FORMAT_R32G32B32A32_SFLOAT:
1191 	case VK_FORMAT_R32G32B32A32_SINT:
1192 	case VK_FORMAT_R32G32B32A32_UINT:
1193 		packed[0] = texel[0];
1194 		packed[1] = texel[1];
1195 		packed[2] = texel[2];
1196 		packed[3] = texel[3];
1197 		break;
1198 	case VK_FORMAT_R32_SFLOAT:
1199 	case VK_FORMAT_R32_SINT:
1200 	case VK_FORMAT_R32_UINT:
1201 		packed[0] = texel[0];
1202 		break;
1203 	case VK_FORMAT_R8G8B8A8_UNORM:
1204 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1205 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1206 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1207 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1208 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1209 		break;
1210 	case VK_FORMAT_B8G8R8A8_UNORM:
1211 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1212 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1213 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1214 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1215 		break;
1216 	case VK_FORMAT_R8G8B8A8_SNORM:
1217 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1218 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1219 		             SIMD::Int(0xFF)) |
1220 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1221 		              SIMD::Int(0xFF))
1222 		             << 8) |
1223 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1224 		              SIMD::Int(0xFF))
1225 		             << 16) |
1226 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1227 		              SIMD::Int(0xFF))
1228 		             << 24);
1229 		break;
1230 	case VK_FORMAT_R8G8B8A8_SINT:
1231 	case VK_FORMAT_R8G8B8A8_UINT:
1232 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1233 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1234 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1235 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1236 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1237 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1238 		break;
1239 	case VK_FORMAT_R16G16B16A16_SFLOAT:
1240 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1241 		packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1242 		break;
1243 	case VK_FORMAT_R16G16B16A16_SINT:
1244 	case VK_FORMAT_R16G16B16A16_UINT:
1245 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1246 		packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1247 		break;
1248 	case VK_FORMAT_R32G32_SFLOAT:
1249 	case VK_FORMAT_R32G32_SINT:
1250 	case VK_FORMAT_R32G32_UINT:
1251 		packed[0] = texel[0];
1252 		packed[1] = texel[1];
1253 		break;
1254 	case VK_FORMAT_R16G16_SFLOAT:
1255 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1256 		break;
1257 	case VK_FORMAT_R16G16_SINT:
1258 	case VK_FORMAT_R16G16_UINT:
1259 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1260 		break;
1261 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1262 		// Truncates instead of rounding. See b/147900455
1263 		packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1264 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1265 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1266 		break;
1267 	case VK_FORMAT_R16_SFLOAT:
1268 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1269 		break;
1270 	case VK_FORMAT_R16G16B16A16_UNORM:
1271 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1272 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1273 		packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1274 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1275 		break;
1276 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1277 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1278 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1279 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1280 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1281 		break;
1282 	case VK_FORMAT_R16G16_UNORM:
1283 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1284 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1285 		break;
1286 	case VK_FORMAT_R8G8_UNORM:
1287 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1288 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1289 		break;
1290 	case VK_FORMAT_R16_UNORM:
1291 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1292 		break;
1293 	case VK_FORMAT_R8_UNORM:
1294 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1295 		break;
1296 	case VK_FORMAT_R16G16B16A16_SNORM:
1297 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1298 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1299 		packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1300 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1301 		break;
1302 	case VK_FORMAT_R16G16_SNORM:
1303 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1304 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1305 		break;
1306 	case VK_FORMAT_R8G8_SNORM:
1307 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1308 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1309 		break;
1310 	case VK_FORMAT_R16_SNORM:
1311 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1312 		break;
1313 	case VK_FORMAT_R8_SNORM:
1314 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1315 		break;
1316 	case VK_FORMAT_R8G8_SINT:
1317 	case VK_FORMAT_R8G8_UINT:
1318 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1319 		break;
1320 	case VK_FORMAT_R16_SINT:
1321 	case VK_FORMAT_R16_UINT:
1322 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1323 		break;
1324 	case VK_FORMAT_R8_SINT:
1325 	case VK_FORMAT_R8_UINT:
1326 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1327 		break;
1328 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1329 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1330 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1331 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1332 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1333 		break;
1334 	default:
1335 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1336 		break;
1337 	}
1338 
1339 	// "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1340 	//  validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1341 	// - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1342 	auto robustness = OutOfBoundsBehavior::Nullify;
1343 	// GetTexelAddress() only needs the EmitState* for SubpassData accesses (i.e. input attachments).
1344 	const EmitState *state = nullptr;
1345 
1346 	SIMD::Int uvwa[4];
1347 	SIMD::Int sample;
1348 
1349 	uint32_t i = 0;
1350 	for(; i < instruction.coordinates; i++)
1351 	{
1352 		uvwa[i] = As<SIMD::Int>(coord[i]);
1353 	}
1354 
1355 	if(instruction.sample)
1356 	{
1357 		sample = As<SIMD::Int>(coord[i]);
1358 	}
1359 
1360 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1361 
1362 	const int texelSize = imageFormat.bytes();
1363 
1364 	// Scatter packed texel data.
1365 	// TODO(b/160531165): Provide scatter abstractions for various element sizes.
1366 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1367 	{
1368 		for(auto i = 0; i < texelSize / 4; i++)
1369 		{
1370 			texelPtr.Store(packed[i], robustness, mask);
1371 			texelPtr += sizeof(float);
1372 		}
1373 	}
1374 	else if(texelSize == 2)
1375 	{
1376 		SIMD::Int offsets = texelPtr.offsets();
1377 		mask = mask & texelPtr.isInBounds(2, robustness);
1378 
1379 		for(int i = 0; i < SIMD::Width; i++)
1380 		{
1381 			If(Extract(mask, i) != 0)
1382 			{
1383 				*Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
1384 			}
1385 		}
1386 	}
1387 	else if(texelSize == 1)
1388 	{
1389 		SIMD::Int offsets = texelPtr.offsets();
1390 		mask = mask & texelPtr.isInBounds(1, robustness);
1391 
1392 		for(int i = 0; i < SIMD::Width; i++)
1393 		{
1394 			If(Extract(mask, i) != 0)
1395 			{
1396 				*Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
1397 			}
1398 		}
1399 	}
1400 	else
1401 		UNREACHABLE("texelSize: %d", int(texelSize));
1402 }
1403 
EmitImageTexelPointer(const ImageInstruction & instruction,EmitState * state) const1404 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const
1405 {
1406 	auto coordinate = Operand(this, state, instruction.coordinateId);
1407 
1408 	Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base;  // vk::StorageImageDescriptor*
1409 
1410 	// VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1411 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1412 	auto robustness = OutOfBoundsBehavior::Nullify;
1413 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1414 
1415 	SIMD::Int uvwa[4];
1416 
1417 	for(uint32_t i = 0; i < instruction.coordinates; i++)
1418 	{
1419 		uvwa[i] = coordinate.Int(i);
1420 	}
1421 
1422 	SIMD::Int sample = Operand(this, state, instruction.sampleId).Int(0);
1423 
1424 	auto ptr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1425 
1426 	state->createPointer(instruction.resultId, ptr);
1427 
1428 	return EmitResult::Continue;
1429 }
1430 
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1431 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1432 {
1433 	// Propagate the image pointer in both cases.
1434 	// Consumers of OpSampledImage will look through to find the sampler pointer.
1435 
1436 	Object::ID resultId = insn.word(2);
1437 	Object::ID imageId = insn.word(3);
1438 
1439 	state->createPointer(resultId, state->getPointer(imageId));
1440 
1441 	return EmitResult::Continue;
1442 }
1443 
1444 }  // namespace sw