1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "System/Types.hpp"
18
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace sw {
25
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 switch(format)
29 {
30 case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70
71 default:
72 UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 return VK_FORMAT_UNDEFINED;
74 }
75 }
76
ImageInstruction(InsnIterator insn,const SpirvShader & spirv)77 SpirvShader::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &spirv)
78 : ImageInstructionSignature(parseVariantAndMethod(insn))
79 , position(insn.distanceFrom(spirv.begin()))
80 {
81 if(samplerMethod == Write)
82 {
83 imageId = insn.word(1);
84 coordinateId = insn.word(2);
85 texelId = insn.word(3);
86 }
87 else
88 {
89 resultTypeId = insn.resultTypeId(); // word(1)
90 resultId = insn.resultId(); // word(2)
91
92 if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer) // Samplerless
93 {
94 imageId = insn.word(3);
95 }
96 else
97 {
98 Object::ID sampledImageId = insn.word(3);
99 const Object &sampledImage = spirv.getObject(sampledImageId);
100
101 if(sampledImage.opcode() == spv::OpSampledImage)
102 {
103 imageId = sampledImage.definition.word(3);
104 samplerId = sampledImage.definition.word(4);
105 }
106 else // Combined image/sampler
107 {
108 imageId = sampledImageId;
109 samplerId = sampledImageId;
110 }
111 }
112
113 coordinateId = insn.word(4);
114 }
115
116 // `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
117 // To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
118 const Type &imageObjectType = spirv.getObjectType(imageId);
119 const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
120 ? spirv.getType(imageObjectType.definition.word(2))
121 : imageObjectType;
122 const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
123 ? spirv.getType(imageReferenceType.element)
124 : imageReferenceType);
125
126 ASSERT(imageType.opcode() == spv::OpTypeImage);
127 dim = imageType.definition.word(3);
128 arrayed = imageType.definition.word(5);
129 imageFormat = imageType.definition.word(8);
130
131 const Object &coordinateObject = spirv.getObject(coordinateId);
132 const Type &coordinateType = spirv.getType(coordinateObject);
133 coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
134
135 if(samplerMethod == TexelPointer)
136 {
137 sampleId = insn.word(5);
138 sample = !spirv.getObject(sampleId).isConstantZero();
139 }
140
141 if(isDref())
142 {
143 drefId = insn.word(5);
144 }
145
146 if(samplerMethod == Gather)
147 {
148 gatherComponent = !isDref() ? spirv.getObject(insn.word(5)).constantValue[0] : 0;
149 }
150
151 uint32_t operandsIndex = getImageOperandsIndex(insn);
152 uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0; // The mask which indicates which operands are provided.
153
154 operandsIndex += 1; // Advance to the first actual operand <id> location.
155
156 if(imageOperands & spv::ImageOperandsBiasMask)
157 {
158 ASSERT(samplerMethod == Bias);
159 lodOrBiasId = insn.word(operandsIndex);
160 operandsIndex += 1;
161 imageOperands &= ~spv::ImageOperandsBiasMask;
162 }
163
164 if(imageOperands & spv::ImageOperandsLodMask)
165 {
166 ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
167 lodOrBiasId = insn.word(operandsIndex);
168 operandsIndex += 1;
169 imageOperands &= ~spv::ImageOperandsLodMask;
170 }
171
172 if(imageOperands & spv::ImageOperandsGradMask)
173 {
174 ASSERT(samplerMethod == Grad);
175 gradDxId = insn.word(operandsIndex + 0);
176 gradDyId = insn.word(operandsIndex + 1);
177 operandsIndex += 2;
178 imageOperands &= ~spv::ImageOperandsGradMask;
179
180 grad = spirv.getObjectType(gradDxId).componentCount;
181 }
182
183 if(imageOperands & spv::ImageOperandsConstOffsetMask)
184 {
185 offsetId = insn.word(operandsIndex);
186 operandsIndex += 1;
187 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
188
189 offset = spirv.getObjectType(offsetId).componentCount;
190 }
191
192 if(imageOperands & spv::ImageOperandsSampleMask)
193 {
194 ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
195 sampleId = insn.word(operandsIndex);
196 operandsIndex += 1;
197 imageOperands &= ~spv::ImageOperandsSampleMask;
198
199 sample = !spirv.getObject(sampleId).isConstantZero();
200 }
201
202 // TODO(b/174475384)
203 if(imageOperands & spv::ImageOperandsZeroExtendMask)
204 {
205 ASSERT(samplerMethod == Read || samplerMethod == Write);
206 imageOperands &= ~spv::ImageOperandsZeroExtendMask;
207 }
208 else if(imageOperands & spv::ImageOperandsSignExtendMask)
209 {
210 ASSERT(samplerMethod == Read || samplerMethod == Write);
211 imageOperands &= ~spv::ImageOperandsSignExtendMask;
212 }
213
214 [[maybe_unused]] spv::Scope scope = spv::ScopeCrossDevice; // "Whilst the CrossDevice scope is defined in SPIR-V, it is disallowed in Vulkan."
215
216 if(imageOperands & spv::ImageOperandsMakeTexelAvailableMask)
217 {
218 scope = static_cast<spv::Scope>(insn.word(operandsIndex));
219 operandsIndex += 1;
220 imageOperands &= ~spv::ImageOperandsMakeTexelAvailableMask;
221 }
222
223 if(imageOperands & spv::ImageOperandsMakeTexelVisibleMask)
224 {
225 scope = static_cast<spv::Scope>(insn.word(operandsIndex));
226 operandsIndex += 1;
227 imageOperands &= ~spv::ImageOperandsMakeTexelVisibleMask;
228 }
229
230 if(imageOperands & spv::ImageOperandsNonPrivateTexelMask)
231 {
232 imageOperands &= ~spv::ImageOperandsNonPrivateTexelMask;
233 }
234
235 if(imageOperands & spv::ImageOperandsVolatileTexelMask)
236 {
237 UNIMPLEMENTED("b/176819536");
238 imageOperands &= ~spv::ImageOperandsVolatileTexelMask;
239 }
240
241 // There should be no remaining image operands.
242 if(imageOperands != 0)
243 {
244 UNSUPPORTED("Image operands 0x%08X", imageOperands);
245 }
246 }
247
parseVariantAndMethod(InsnIterator insn)248 SpirvShader::ImageInstructionSignature SpirvShader::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
249 {
250 uint32_t imageOperands = getImageOperandsMask(insn);
251 bool bias = imageOperands & spv::ImageOperandsBiasMask;
252 bool grad = imageOperands & spv::ImageOperandsGradMask;
253
254 switch(insn.opcode())
255 {
256 case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
257 case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
258 case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
259 case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
260 case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
261 case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
262 case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
263 case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
264 case spv::OpImageGather: return { None, Gather };
265 case spv::OpImageDrefGather: return { Dref, Gather };
266 case spv::OpImageFetch: return { None, Fetch };
267 case spv::OpImageQueryLod: return { None, Query };
268 case spv::OpImageRead: return { None, Read };
269 case spv::OpImageWrite: return { None, Write };
270 case spv::OpImageTexelPointer: return { None, TexelPointer };
271
272 default:
273 ASSERT(false);
274 return { None, Implicit };
275 }
276 }
277
278 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)279 uint32_t SpirvShader::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
280 {
281 switch(insn.opcode())
282 {
283 case spv::OpImageSampleImplicitLod:
284 case spv::OpImageSampleProjImplicitLod:
285 return insn.wordCount() > 5 ? 5 : 0; // Optional
286 case spv::OpImageSampleExplicitLod:
287 case spv::OpImageSampleProjExplicitLod:
288 return 5; // "Either Lod or Grad image operands must be present."
289 case spv::OpImageSampleDrefImplicitLod:
290 case spv::OpImageSampleProjDrefImplicitLod:
291 return insn.wordCount() > 6 ? 6 : 0; // Optional
292 case spv::OpImageSampleDrefExplicitLod:
293 case spv::OpImageSampleProjDrefExplicitLod:
294 return 6; // "Either Lod or Grad image operands must be present."
295 case spv::OpImageGather:
296 case spv::OpImageDrefGather:
297 return insn.wordCount() > 6 ? 6 : 0; // Optional
298 case spv::OpImageFetch:
299 return insn.wordCount() > 5 ? 5 : 0; // Optional
300 case spv::OpImageQueryLod:
301 ASSERT(insn.wordCount() == 5);
302 return 0; // No image operands.
303 case spv::OpImageRead:
304 return insn.wordCount() > 5 ? 5 : 0; // Optional
305 case spv::OpImageWrite:
306 return insn.wordCount() > 4 ? 4 : 0; // Optional
307 case spv::OpImageTexelPointer:
308 ASSERT(insn.wordCount() == 6);
309 return 0; // No image operands.
310
311 default:
312 ASSERT(false);
313 return 0;
314 }
315 }
316
getImageOperandsMask(InsnIterator insn)317 uint32_t SpirvShader::ImageInstruction::getImageOperandsMask(InsnIterator insn)
318 {
319 uint32_t operandsIndex = getImageOperandsIndex(insn);
320 return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
321 }
322
EmitImageSample(const ImageInstruction & instruction,EmitState * state) const323 SpirvShader::EmitResult SpirvShader::EmitImageSample(const ImageInstruction &instruction, EmitState *state) const
324 {
325 auto &resultType = getType(instruction.resultTypeId);
326 auto &result = state->createIntermediate(instruction.resultId, resultType.componentCount);
327 Array<SIMD::Float> out(4);
328
329 // TODO(b/153380916): When we're in a code path that is always executed,
330 // i.e. post-dominators of the entry block, we don't have to dynamically
331 // check whether any lanes are active, and can elide the jump.
332 If(AnyTrue(state->activeLaneMask()))
333 {
334 EmitImageSampleUnconditional(out, instruction, state);
335 }
336
337 for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
338
339 return EmitResult::Continue;
340 }
341
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction,EmitState * state) const342 void SpirvShader::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const
343 {
344 Pointer<Byte> imageDescriptor = state->getPointer(instruction.imageId).base; // vk::SampledImageDescriptor*
345
346 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, instruction, state);
347
348 callSamplerFunction(samplerFunction, out, imageDescriptor, instruction, state);
349 }
350
lookupSamplerFunction(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const351 Pointer<Byte> SpirvShader::lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
352 {
353 Int samplerId = 0;
354
355 if(instruction.samplerId != 0)
356 {
357 Pointer<Byte> samplerDescriptor = state->getPointer(instruction.samplerId).base; // vk::SampledImageDescriptor*
358
359 samplerId = *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId)); // vk::Sampler::id
360 }
361
362 auto &cache = state->routine->samplerCache.at(instruction.position);
363 Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId); // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
364
365 If(!cacheHit)
366 {
367 rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
368 cache.function = Call(getImageSampler, state->routine->device, instruction.signature, samplerId, imageViewId);
369 cache.imageDescriptor = imageDescriptor;
370 cache.samplerId = samplerId;
371 }
372
373 return cache.function;
374 }
375
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const376 void SpirvShader::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
377 {
378 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
379
380 auto coordinate = Operand(this, state, instruction.coordinateId);
381
382 uint32_t i = 0;
383 for(; i < instruction.coordinates; i++)
384 {
385 if(instruction.isProj())
386 {
387 in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
388 }
389 else
390 {
391 in[i] = coordinate.Float(i);
392 }
393 }
394
395 if(instruction.isDref())
396 {
397 auto drefValue = Operand(this, state, instruction.drefId);
398
399 if(instruction.isProj())
400 {
401 in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
402 }
403 else
404 {
405 in[i] = drefValue.Float(0);
406 }
407
408 i++;
409 }
410
411 if(instruction.lodOrBiasId != 0)
412 {
413 auto lodValue = Operand(this, state, instruction.lodOrBiasId);
414 in[i] = lodValue.Float(0);
415 i++;
416 }
417 else if(instruction.gradDxId != 0)
418 {
419 auto dxValue = Operand(this, state, instruction.gradDxId);
420 auto dyValue = Operand(this, state, instruction.gradDyId);
421 ASSERT(dxValue.componentCount == dxValue.componentCount);
422
423 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
424 {
425 in[i] = dxValue.Float(j);
426 }
427
428 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
429 {
430 in[i] = dyValue.Float(j);
431 }
432 }
433 else if(instruction.samplerMethod == Fetch)
434 {
435 // The instruction didn't provide a lod operand, but the sampler's Fetch
436 // function requires one to be present. If no lod is supplied, the default
437 // is zero.
438 in[i] = As<SIMD::Float>(SIMD::Int(0));
439 i++;
440 }
441
442 if(instruction.offsetId != 0)
443 {
444 auto offsetValue = Operand(this, state, instruction.offsetId);
445
446 for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
447 {
448 in[i] = As<SIMD::Float>(offsetValue.Int(j)); // Integer values, but transfered as float.
449 }
450 }
451
452 if(instruction.sample)
453 {
454 auto sampleValue = Operand(this, state, instruction.sampleId);
455 in[i] = As<SIMD::Float>(sampleValue.Int(0));
456 }
457
458 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
459
460 Call<ImageSampler>(samplerFunction, texture, &in, &out, state->routine->constants);
461 }
462
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const463 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
464 {
465 auto &resultTy = getType(insn.resultTypeId());
466 auto imageId = Object::ID(insn.word(3));
467 auto lodId = Object::ID(insn.word(4));
468
469 auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
470 GetImageDimensions(state, resultTy, imageId, lodId, dst);
471
472 return EmitResult::Continue;
473 }
474
EmitImageQuerySize(InsnIterator insn,EmitState * state) const475 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
476 {
477 auto &resultTy = getType(insn.resultTypeId());
478 auto imageId = Object::ID(insn.word(3));
479 auto lodId = Object::ID(0);
480
481 auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
482 GetImageDimensions(state, resultTy, imageId, lodId, dst);
483
484 return EmitResult::Continue;
485 }
486
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const487 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
488 {
489 auto routine = state->routine;
490 auto &image = getObject(imageId);
491 auto &imageType = getType(image);
492
493 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
494 bool isArrayed = imageType.definition.word(5) != 0;
495 uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
496
497 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
498 auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
499
500 Pointer<Byte> descriptor = state->getPointer(imageId).base;
501
502 Int width;
503 Int height;
504 Int depth;
505
506 switch(descriptorType)
507 {
508 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
509 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
510 width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
511 height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
512 depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
513 break;
514 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
515 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
516 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
517 width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
518 height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
519 depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
520 break;
521 default:
522 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
523 }
524
525 if(lodId != 0)
526 {
527 auto lodVal = Operand(this, state, lodId);
528 ASSERT(lodVal.componentCount == 1);
529 auto lod = lodVal.Int(0);
530 auto one = SIMD::Int(1);
531
532 if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
533 if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
534 if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
535 }
536 else
537 {
538
539 if(dimensions >= 1) dst.move(0, SIMD::Int(width));
540 if(dimensions >= 2) dst.move(1, SIMD::Int(height));
541 if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
542 }
543
544 if(isArrayed)
545 {
546 dst.move(dimensions, SIMD::Int(depth));
547 }
548 }
549
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const550 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
551 {
552 auto &resultTy = getType(insn.resultTypeId());
553 ASSERT(resultTy.componentCount == 1);
554 auto imageId = Object::ID(insn.word(3));
555
556 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
557 auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
558
559 Pointer<Byte> descriptor = state->getPointer(imageId).base;
560 Int mipLevels = 0;
561 switch(descriptorType)
562 {
563 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
564 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
565 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
566 mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
567 break;
568 default:
569 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
570 }
571
572 auto &dst = state->createIntermediate(insn.resultId(), 1);
573 dst.move(0, SIMD::Int(mipLevels));
574
575 return EmitResult::Continue;
576 }
577
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const578 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
579 {
580 auto &resultTy = getType(insn.resultTypeId());
581 ASSERT(resultTy.componentCount == 1);
582 auto imageId = Object::ID(insn.word(3));
583 auto imageTy = getObjectType(imageId);
584 ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
585 ASSERT(imageTy.definition.word(3) == spv::Dim2D);
586 ASSERT(imageTy.definition.word(6 /* MS */) == 1);
587
588 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
589 auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
590
591 Pointer<Byte> descriptor = state->getPointer(imageId).base;
592 Int sampleCount = 0;
593 switch(descriptorType)
594 {
595 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
596 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
597 break;
598 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
599 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
600 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
601 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
602 break;
603 default:
604 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
605 }
606
607 auto &dst = state->createIntermediate(insn.resultId(), 1);
608 dst.move(0, SIMD::Int(sampleCount));
609
610 return EmitResult::Continue;
611 }
612
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const EmitState * state)613 SIMD::Pointer SpirvShader::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state)
614 {
615 bool isArrayed = instruction.arrayed;
616 spv::Dim dim = static_cast<spv::Dim>(instruction.dim);
617 int dims = instruction.coordinates - (isArrayed ? 1 : 0);
618
619 SIMD::Int u = coordinate[0];
620 SIMD::Int v = SIMD::Int(0);
621
622 if(dims > 1)
623 {
624 v = coordinate[1];
625 }
626
627 if(dim == spv::DimSubpassData)
628 {
629 u += state->routine->windowSpacePosition[0];
630 v += state->routine->windowSpacePosition[1];
631 }
632
633 const int texelSize = imageFormat.bytes();
634 const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
635 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
636 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
637 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
638 auto slicePitch = SIMD::Int(
639 *Pointer<Int>(descriptor + (useStencilAspect
640 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
641 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
642 auto samplePitch = SIMD::Int(
643 *Pointer<Int>(descriptor + (useStencilAspect
644 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
645 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
646
647 SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
648
649 if(dims > 1)
650 {
651 ptrOffset += v * rowPitch;
652 }
653
654 SIMD::Int w = 0;
655 if((dims > 2) || isArrayed)
656 {
657 if(dims > 2)
658 {
659 w += coordinate[2];
660 }
661
662 if(isArrayed)
663 {
664 w += coordinate[dims];
665 }
666
667 ptrOffset += w * slicePitch;
668 }
669
670 if(dim == spv::DimSubpassData)
671 {
672 // Multiview input attachment access is to the layer corresponding to the current view
673 ptrOffset += SIMD::Int(state->routine->layer) * slicePitch;
674 }
675
676 if(instruction.sample)
677 {
678 ptrOffset += sample * samplePitch;
679 }
680
681 // If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
682 // Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
683 if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
684 {
685 SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
686 SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
687
688 if(dims > 1)
689 {
690 SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
691 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
692 }
693
694 if((dims > 2) || isArrayed)
695 {
696 UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
697 if(dim == spv::DimCube) { depth *= 6; }
698 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth)));
699 }
700
701 if(instruction.sample)
702 {
703 SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
704 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
705 }
706
707 constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16; // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
708 static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
709
710 ptrOffset = (ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET)); // oob ? OOB_OFFSET : ptrOffset // TODO: IfThenElse()
711 }
712
713 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
714 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
715 : OFFSET(vk::StorageImageDescriptor, ptr)));
716
717 Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
718
719 return SIMD::Pointer(imageBase, imageSizeInBytes, ptrOffset);
720 }
721
EmitImageRead(const ImageInstruction & instruction,EmitState * state) const722 SpirvShader::EmitResult SpirvShader::EmitImageRead(const ImageInstruction &instruction, EmitState *state) const
723 {
724 auto &resultType = getObjectType(instruction.resultId);
725 auto &image = getObject(instruction.imageId);
726 auto &imageType = getType(image);
727
728 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
729 auto dim = static_cast<spv::Dim>(instruction.dim);
730
731 auto coordinate = Operand(this, state, instruction.coordinateId);
732 const DescriptorDecorations &d = descriptorDecorations.at(instruction.imageId);
733
734 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
735 // the renderpass data instead. In all other cases, we can use the format in the instruction.
736 vk::Format imageFormat = (dim == spv::DimSubpassData)
737 ? inputAttachmentFormats[d.InputAttachmentIndex]
738 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
739
740 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
741 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
742 bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
743 getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
744
745 if(useStencilAspect)
746 {
747 imageFormat = VK_FORMAT_S8_UINT;
748 }
749
750 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
751 auto &dst = state->createIntermediate(instruction.resultId, resultType.componentCount);
752
753 // VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
754 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
755 auto robustness = OutOfBoundsBehavior::Nullify;
756
757 SIMD::Int uvwa[4];
758 SIMD::Int sample;
759
760 for(uint32_t i = 0; i < instruction.coordinates; i++)
761 {
762 uvwa[i] = coordinate.Int(i);
763 }
764
765 if(instruction.sample)
766 {
767 sample = Operand(this, state, instruction.sampleId).Int(0);
768 }
769
770 auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
771
772 const int texelSize = imageFormat.bytes();
773
774 // Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
775 // TODO(b/160531165): Provide gather abstractions for various element sizes.
776 SIMD::Int packed[4];
777 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
778 {
779 for(auto i = 0; i < texelSize / 4; i++)
780 {
781 packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
782 texelPtr += sizeof(float);
783 }
784 }
785 else if(texelSize == 2)
786 {
787 SIMD::Int offsets = texelPtr.offsets();
788 SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
789
790 for(int i = 0; i < SIMD::Width; i++)
791 {
792 If(Extract(mask, i) != 0)
793 {
794 packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
795 }
796 }
797 }
798 else if(texelSize == 1)
799 {
800 SIMD::Int offsets = texelPtr.offsets();
801 SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
802
803 for(int i = 0; i < SIMD::Width; i++)
804 {
805 If(Extract(mask, i) != 0)
806 {
807 packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
808 }
809 }
810 }
811 else
812 UNREACHABLE("texelSize: %d", int(texelSize));
813
814 // Format support requirements here come from two sources:
815 // - Minimum required set of formats for loads from storage images
816 // - Any format supported as a color or depth/stencil attachment, for input attachments
817 switch(imageFormat)
818 {
819 case VK_FORMAT_R32G32B32A32_SFLOAT:
820 case VK_FORMAT_R32G32B32A32_SINT:
821 case VK_FORMAT_R32G32B32A32_UINT:
822 dst.move(0, packed[0]);
823 dst.move(1, packed[1]);
824 dst.move(2, packed[2]);
825 dst.move(3, packed[3]);
826 break;
827 case VK_FORMAT_R32_SINT:
828 case VK_FORMAT_R32_UINT:
829 dst.move(0, packed[0]);
830 // Fill remaining channels with 0,0,1 (of the correct type)
831 dst.move(1, SIMD::Int(0));
832 dst.move(2, SIMD::Int(0));
833 dst.move(3, SIMD::Int(1));
834 break;
835 case VK_FORMAT_R32_SFLOAT:
836 case VK_FORMAT_D32_SFLOAT:
837 case VK_FORMAT_D32_SFLOAT_S8_UINT:
838 dst.move(0, packed[0]);
839 // Fill remaining channels with 0,0,1 (of the correct type)
840 dst.move(1, SIMD::Float(0.0f));
841 dst.move(2, SIMD::Float(0.0f));
842 dst.move(3, SIMD::Float(1.0f));
843 break;
844 case VK_FORMAT_D16_UNORM:
845 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
846 dst.move(1, SIMD::Float(0.0f));
847 dst.move(2, SIMD::Float(0.0f));
848 dst.move(3, SIMD::Float(1.0f));
849 break;
850 case VK_FORMAT_R16G16B16A16_UNORM:
851 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
852 dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
853 dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
854 dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
855 break;
856 case VK_FORMAT_R16G16B16A16_SNORM:
857 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
858 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
859 dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
860 dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
861 break;
862 case VK_FORMAT_R16G16B16A16_SINT:
863 dst.move(0, (packed[0] << 16) >> 16);
864 dst.move(1, packed[0] >> 16);
865 dst.move(2, (packed[1] << 16) >> 16);
866 dst.move(3, packed[1] >> 16);
867 break;
868 case VK_FORMAT_R16G16B16A16_UINT:
869 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
870 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
871 dst.move(2, packed[1] & SIMD::Int(0xFFFF));
872 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
873 break;
874 case VK_FORMAT_R16G16B16A16_SFLOAT:
875 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
876 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
877 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
878 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
879 break;
880 case VK_FORMAT_R8G8B8A8_SNORM:
881 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
882 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
883 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
884 dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
885 dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
886 break;
887 case VK_FORMAT_R8G8B8A8_UNORM:
888 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
889 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
890 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
891 dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
892 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
893 break;
894 case VK_FORMAT_R8G8B8A8_SRGB:
895 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
896 dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
897 dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
898 dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
899 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
900 break;
901 case VK_FORMAT_B8G8R8A8_UNORM:
902 dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
903 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
904 dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
905 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
906 break;
907 case VK_FORMAT_B8G8R8A8_SRGB:
908 dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
909 dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
910 dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
911 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
912 break;
913 case VK_FORMAT_R8G8B8A8_UINT:
914 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
915 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
916 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
917 dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
918 dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
919 break;
920 case VK_FORMAT_R8G8B8A8_SINT:
921 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
922 dst.move(0, (packed[0] << 24) >> 24);
923 dst.move(1, (packed[0] << 16) >> 24);
924 dst.move(2, (packed[0] << 8) >> 24);
925 dst.move(3, packed[0] >> 24);
926 break;
927 case VK_FORMAT_R8_UNORM:
928 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
929 dst.move(1, SIMD::Float(0.0f));
930 dst.move(2, SIMD::Float(0.0f));
931 dst.move(3, SIMD::Float(1.0f));
932 break;
933 case VK_FORMAT_R8_SNORM:
934 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
935 dst.move(1, SIMD::Float(0.0f));
936 dst.move(2, SIMD::Float(0.0f));
937 dst.move(3, SIMD::Float(1.0f));
938 break;
939 case VK_FORMAT_R8_UINT:
940 case VK_FORMAT_S8_UINT:
941 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
942 dst.move(1, SIMD::UInt(0));
943 dst.move(2, SIMD::UInt(0));
944 dst.move(3, SIMD::UInt(1));
945 break;
946 case VK_FORMAT_R8_SINT:
947 dst.move(0, (packed[0] << 24) >> 24);
948 dst.move(1, SIMD::Int(0));
949 dst.move(2, SIMD::Int(0));
950 dst.move(3, SIMD::Int(1));
951 break;
952 case VK_FORMAT_R8G8_UNORM:
953 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
954 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
955 dst.move(2, SIMD::Float(0.0f));
956 dst.move(3, SIMD::Float(1.0f));
957 break;
958 case VK_FORMAT_R8G8_SNORM:
959 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
960 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
961 dst.move(2, SIMD::Float(0.0f));
962 dst.move(3, SIMD::Float(1.0f));
963 break;
964 case VK_FORMAT_R8G8_UINT:
965 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
966 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
967 dst.move(2, SIMD::UInt(0));
968 dst.move(3, SIMD::UInt(1));
969 break;
970 case VK_FORMAT_R8G8_SINT:
971 dst.move(0, (packed[0] << 24) >> 24);
972 dst.move(1, (packed[0] << 16) >> 24);
973 dst.move(2, SIMD::Int(0));
974 dst.move(3, SIMD::Int(1));
975 break;
976 case VK_FORMAT_R16_SFLOAT:
977 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
978 dst.move(1, SIMD::Float(0.0f));
979 dst.move(2, SIMD::Float(0.0f));
980 dst.move(3, SIMD::Float(1.0f));
981 break;
982 case VK_FORMAT_R16_UNORM:
983 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
984 dst.move(1, SIMD::Float(0.0f));
985 dst.move(2, SIMD::Float(0.0f));
986 dst.move(3, SIMD::Float(1.0f));
987 break;
988 case VK_FORMAT_R16_SNORM:
989 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
990 dst.move(1, SIMD::Float(0.0f));
991 dst.move(2, SIMD::Float(0.0f));
992 dst.move(3, SIMD::Float(1.0f));
993 break;
994 case VK_FORMAT_R16_UINT:
995 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
996 dst.move(1, SIMD::UInt(0));
997 dst.move(2, SIMD::UInt(0));
998 dst.move(3, SIMD::UInt(1));
999 break;
1000 case VK_FORMAT_R16_SINT:
1001 dst.move(0, (packed[0] << 16) >> 16);
1002 dst.move(1, SIMD::Int(0));
1003 dst.move(2, SIMD::Int(0));
1004 dst.move(3, SIMD::Int(1));
1005 break;
1006 case VK_FORMAT_R16G16_SFLOAT:
1007 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1008 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
1009 dst.move(2, SIMD::Float(0.0f));
1010 dst.move(3, SIMD::Float(1.0f));
1011 break;
1012 case VK_FORMAT_R16G16_UNORM:
1013 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1014 dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1015 dst.move(2, SIMD::Float(0.0f));
1016 dst.move(3, SIMD::Float(1.0f));
1017 break;
1018 case VK_FORMAT_R16G16_SNORM:
1019 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1020 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1021 dst.move(2, SIMD::Float(0.0f));
1022 dst.move(3, SIMD::Float(1.0f));
1023 break;
1024 case VK_FORMAT_R16G16_UINT:
1025 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1026 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1027 dst.move(2, SIMD::UInt(0));
1028 dst.move(3, SIMD::UInt(1));
1029 break;
1030 case VK_FORMAT_R16G16_SINT:
1031 dst.move(0, (packed[0] << 16) >> 16);
1032 dst.move(1, packed[0] >> 16);
1033 dst.move(2, SIMD::Int(0));
1034 dst.move(3, SIMD::Int(1));
1035 break;
1036 case VK_FORMAT_R32G32_SINT:
1037 case VK_FORMAT_R32G32_UINT:
1038 dst.move(0, packed[0]);
1039 dst.move(1, packed[1]);
1040 dst.move(2, SIMD::Int(0));
1041 dst.move(3, SIMD::Int(1));
1042 break;
1043 case VK_FORMAT_R32G32_SFLOAT:
1044 dst.move(0, packed[0]);
1045 dst.move(1, packed[1]);
1046 dst.move(2, SIMD::Float(0.0f));
1047 dst.move(3, SIMD::Float(1.0f));
1048 break;
1049 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1050 dst.move(0, packed[0] & SIMD::Int(0x3FF));
1051 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1052 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1053 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1054 break;
1055 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1056 dst.move(2, packed[0] & SIMD::Int(0x3FF));
1057 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1058 dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1059 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1060 break;
1061 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1062 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1063 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1064 dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1065 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1066 break;
1067 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1068 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1069 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1070 dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1071 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1072 break;
1073 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1074 dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1075 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1076 dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1077 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1078 break;
1079 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1080 dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1081 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1082 dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1083 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1084 break;
1085 case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
1086 dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1087 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1088 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1089 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1090 break;
1091 case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
1092 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1093 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1094 dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1095 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1096 break;
1097 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1098 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1099 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1100 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1101 dst.move(3, SIMD::Float(1.0f));
1102 break;
1103 case VK_FORMAT_B5G6R5_UNORM_PACK16:
1104 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1105 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1106 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1107 dst.move(3, SIMD::Float(1.0f));
1108 break;
1109 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1110 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1111 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1112 dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1113 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1114 break;
1115 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1116 dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1117 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1118 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1119 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1120 break;
1121 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1122 dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1123 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1124 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1125 dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1126 break;
1127 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1128 dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1129 dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1130 dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1131 dst.move(3, SIMD::Float(1.0f));
1132 break;
1133 default:
1134 UNSUPPORTED("VkFormat %d", int(imageFormat));
1135 break;
1136 }
1137
1138 return EmitResult::Continue;
1139 }
1140
EmitImageWrite(const ImageInstruction & instruction,EmitState * state) const1141 SpirvShader::EmitResult SpirvShader::EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const
1142 {
1143 imageWriteEmitted = true;
1144
1145 auto &image = getObject(instruction.imageId);
1146 auto &imageType = getType(image);
1147
1148 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1149 ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData); // "Its Dim operand must not be SubpassData."
1150
1151 auto coordinate = Operand(this, state, instruction.coordinateId);
1152 auto texel = Operand(this, state, instruction.texelId);
1153
1154 Array<SIMD::Int> coord(5); // uvwa & sample
1155
1156 uint32_t i = 0;
1157 for(; i < instruction.coordinates; i++)
1158 {
1159 coord[i] = coordinate.Int(i);
1160 }
1161
1162 if(instruction.sample)
1163 {
1164 coord[i] = Operand(this, state, instruction.sampleId).Int(0);
1165 }
1166
1167 Array<SIMD::Int> texelAndMask(5);
1168 texelAndMask[0] = texel.Int(0);
1169 texelAndMask[1] = texel.Int(1);
1170 texelAndMask[2] = texel.Int(2);
1171 texelAndMask[3] = texel.Int(3);
1172 texelAndMask[4] = state->activeStoresAndAtomicsMask();
1173
1174 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
1175
1176 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1177
1178 if(imageFormat == VK_FORMAT_UNDEFINED) // spv::ImageFormatUnknown
1179 {
1180 Pointer<Byte> samplerFunction = lookupSamplerFunction(descriptor, instruction, state);
1181
1182 Call<ImageSampler>(samplerFunction, descriptor, &coord, &texelAndMask, state->routine->constants);
1183 }
1184 else
1185 {
1186 WriteImage(instruction, descriptor, &coord, &texelAndMask, imageFormat);
1187 }
1188
1189 return EmitResult::Continue;
1190 }
1191
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1192 void SpirvShader::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1193 {
1194 SIMD::Int texel[4];
1195 texel[0] = texelAndMask[0];
1196 texel[1] = texelAndMask[1];
1197 texel[2] = texelAndMask[2];
1198 texel[3] = texelAndMask[3];
1199 SIMD::Int mask = texelAndMask[4];
1200
1201 SIMD::Int packed[4];
1202 switch(imageFormat)
1203 {
1204 case VK_FORMAT_R32G32B32A32_SFLOAT:
1205 case VK_FORMAT_R32G32B32A32_SINT:
1206 case VK_FORMAT_R32G32B32A32_UINT:
1207 packed[0] = texel[0];
1208 packed[1] = texel[1];
1209 packed[2] = texel[2];
1210 packed[3] = texel[3];
1211 break;
1212 case VK_FORMAT_R32_SFLOAT:
1213 case VK_FORMAT_R32_SINT:
1214 case VK_FORMAT_R32_UINT:
1215 packed[0] = texel[0];
1216 break;
1217 case VK_FORMAT_R8G8B8A8_UNORM:
1218 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1219 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1220 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1221 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1222 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1223 break;
1224 case VK_FORMAT_B8G8R8A8_UNORM:
1225 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1226 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1227 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1228 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1229 break;
1230 case VK_FORMAT_B8G8R8A8_SRGB:
1231 packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1232 ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1233 ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1234 ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[3])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1235 break;
1236 case VK_FORMAT_R8G8B8A8_SNORM:
1237 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1238 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1239 SIMD::Int(0xFF)) |
1240 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1241 SIMD::Int(0xFF))
1242 << 8) |
1243 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1244 SIMD::Int(0xFF))
1245 << 16) |
1246 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1247 SIMD::Int(0xFF))
1248 << 24);
1249 break;
1250 case VK_FORMAT_R8G8B8A8_SINT:
1251 case VK_FORMAT_R8G8B8A8_UINT:
1252 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1253 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1254 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1255 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1256 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1257 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1258 break;
1259 case VK_FORMAT_R16G16B16A16_SFLOAT:
1260 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1261 packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1262 break;
1263 case VK_FORMAT_R16G16B16A16_SINT:
1264 case VK_FORMAT_R16G16B16A16_UINT:
1265 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1266 packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1267 break;
1268 case VK_FORMAT_R32G32_SFLOAT:
1269 case VK_FORMAT_R32G32_SINT:
1270 case VK_FORMAT_R32G32_UINT:
1271 packed[0] = texel[0];
1272 packed[1] = texel[1];
1273 break;
1274 case VK_FORMAT_R16G16_SFLOAT:
1275 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1276 break;
1277 case VK_FORMAT_R16G16_SINT:
1278 case VK_FORMAT_R16G16_UINT:
1279 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1280 break;
1281 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1282 // Truncates instead of rounding. See b/147900455
1283 packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1284 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1285 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1286 break;
1287 case VK_FORMAT_R16_SFLOAT:
1288 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1289 break;
1290 case VK_FORMAT_R16G16B16A16_UNORM:
1291 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1292 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1293 packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1294 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1295 break;
1296 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1297 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1298 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1299 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1300 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1301 break;
1302 case VK_FORMAT_R16G16_UNORM:
1303 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1304 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1305 break;
1306 case VK_FORMAT_R8G8_UNORM:
1307 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1308 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1309 break;
1310 case VK_FORMAT_R16_UNORM:
1311 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1312 break;
1313 case VK_FORMAT_R8_UNORM:
1314 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1315 break;
1316 case VK_FORMAT_R16G16B16A16_SNORM:
1317 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1318 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1319 packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1320 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1321 break;
1322 case VK_FORMAT_R16G16_SNORM:
1323 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1324 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1325 break;
1326 case VK_FORMAT_R8G8_SNORM:
1327 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1328 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1329 break;
1330 case VK_FORMAT_R16_SNORM:
1331 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1332 break;
1333 case VK_FORMAT_R8_SNORM:
1334 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1335 break;
1336 case VK_FORMAT_R8G8_SINT:
1337 case VK_FORMAT_R8G8_UINT:
1338 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1339 break;
1340 case VK_FORMAT_R16_SINT:
1341 case VK_FORMAT_R16_UINT:
1342 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1343 break;
1344 case VK_FORMAT_R8_SINT:
1345 case VK_FORMAT_R8_UINT:
1346 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1347 break;
1348 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1349 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1350 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1351 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1352 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1353 break;
1354 default:
1355 UNSUPPORTED("VkFormat %d", int(imageFormat));
1356 break;
1357 }
1358
1359 // "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1360 // validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1361 // - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1362 auto robustness = OutOfBoundsBehavior::Nullify;
1363 // GetTexelAddress() only needs the EmitState* for SubpassData accesses (i.e. input attachments).
1364 const EmitState *state = nullptr;
1365
1366 SIMD::Int uvwa[4];
1367 SIMD::Int sample;
1368
1369 uint32_t i = 0;
1370 for(; i < instruction.coordinates; i++)
1371 {
1372 uvwa[i] = As<SIMD::Int>(coord[i]);
1373 }
1374
1375 if(instruction.sample)
1376 {
1377 sample = As<SIMD::Int>(coord[i]);
1378 }
1379
1380 auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1381
1382 const int texelSize = imageFormat.bytes();
1383
1384 // Scatter packed texel data.
1385 // TODO(b/160531165): Provide scatter abstractions for various element sizes.
1386 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1387 {
1388 for(auto i = 0; i < texelSize / 4; i++)
1389 {
1390 texelPtr.Store(packed[i], robustness, mask);
1391 texelPtr += sizeof(float);
1392 }
1393 }
1394 else if(texelSize == 2)
1395 {
1396 SIMD::Int offsets = texelPtr.offsets();
1397 mask = mask & texelPtr.isInBounds(2, robustness);
1398
1399 for(int i = 0; i < SIMD::Width; i++)
1400 {
1401 If(Extract(mask, i) != 0)
1402 {
1403 *Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
1404 }
1405 }
1406 }
1407 else if(texelSize == 1)
1408 {
1409 SIMD::Int offsets = texelPtr.offsets();
1410 mask = mask & texelPtr.isInBounds(1, robustness);
1411
1412 for(int i = 0; i < SIMD::Width; i++)
1413 {
1414 If(Extract(mask, i) != 0)
1415 {
1416 *Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
1417 }
1418 }
1419 }
1420 else
1421 UNREACHABLE("texelSize: %d", int(texelSize));
1422 }
1423
EmitImageTexelPointer(const ImageInstruction & instruction,EmitState * state) const1424 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const
1425 {
1426 auto coordinate = Operand(this, state, instruction.coordinateId);
1427
1428 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
1429
1430 // VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1431 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1432 auto robustness = OutOfBoundsBehavior::Nullify;
1433 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1434
1435 SIMD::Int uvwa[4];
1436
1437 for(uint32_t i = 0; i < instruction.coordinates; i++)
1438 {
1439 uvwa[i] = coordinate.Int(i);
1440 }
1441
1442 SIMD::Int sample = Operand(this, state, instruction.sampleId).Int(0);
1443
1444 auto ptr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1445
1446 state->createPointer(instruction.resultId, ptr);
1447
1448 return EmitResult::Continue;
1449 }
1450
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1451 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1452 {
1453 // Propagate the image pointer in both cases.
1454 // Consumers of OpSampledImage will look through to find the sampler pointer.
1455
1456 Object::ID resultId = insn.word(2);
1457 Object::ID imageId = insn.word(3);
1458
1459 state->createPointer(resultId, state->getPointer(imageId));
1460
1461 return EmitResult::Continue;
1462 }
1463
1464 } // namespace sw