1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "System/Types.hpp"
18
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace {
25
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 switch(format)
29 {
30 case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70
71 default:
72 UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 return VK_FORMAT_UNDEFINED;
74 }
75 }
76
sRGBtoLinear(sw::SIMD::Float c)77 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
78 {
79 sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
80 sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
81
82 sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
83
84 return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
85 }
86
87 } // anonymous namespace
88
89 namespace sw {
90
ImageInstruction(InsnIterator insn,const SpirvShader & spirv)91 SpirvShader::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &spirv)
92 : ImageInstructionSignature(parseVariantAndMethod(insn))
93 , position(insn.distanceFrom(spirv.begin()))
94 {
95 if(samplerMethod == Write)
96 {
97 imageId = insn.word(1);
98 coordinateId = insn.word(2);
99 texelId = insn.word(3);
100 }
101 else
102 {
103 resultTypeId = insn.resultTypeId(); // word(1)
104 resultId = insn.resultId(); // word(2)
105
106 if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer) // Samplerless
107 {
108 imageId = insn.word(3);
109 }
110 else
111 {
112 Object::ID sampledImageId = insn.word(3);
113 const Object &sampledImage = spirv.getObject(sampledImageId);
114
115 if(sampledImage.opcode() == spv::OpSampledImage)
116 {
117 imageId = sampledImage.definition.word(3);
118 samplerId = sampledImage.definition.word(4);
119 }
120 else // Combined image/sampler
121 {
122 imageId = sampledImageId;
123 samplerId = sampledImageId;
124 }
125 }
126
127 coordinateId = insn.word(4);
128 }
129
130 // `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
131 // To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
132 const Type &imageObjectType = spirv.getObjectType(imageId);
133 const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
134 ? spirv.getType(imageObjectType.definition.word(2))
135 : imageObjectType;
136 const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
137 ? spirv.getType(imageReferenceType.element)
138 : imageReferenceType);
139
140 ASSERT(imageType.opcode() == spv::OpTypeImage);
141 dim = imageType.definition.word(3);
142 arrayed = imageType.definition.word(5);
143 imageFormat = imageType.definition.word(8);
144
145 const Object &coordinateObject = spirv.getObject(coordinateId);
146 const Type &coordinateType = spirv.getType(coordinateObject);
147 coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
148
149 if(samplerMethod == TexelPointer)
150 {
151 sampleId = insn.word(5);
152 sample = !spirv.getObject(sampleId).isConstantZero();
153 }
154
155 if(isDref())
156 {
157 drefId = insn.word(5);
158 }
159
160 if(samplerMethod == Gather)
161 {
162 gatherComponent = !isDref() ? spirv.getObject(insn.word(5)).constantValue[0] : 0;
163 }
164
165 uint32_t operandsIndex = getImageOperandsIndex(insn);
166 uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0; // The mask which indicates which operands are provided.
167
168 operandsIndex += 1; // Advance to the first actual operand <id> location.
169
170 if(imageOperands & spv::ImageOperandsBiasMask)
171 {
172 ASSERT(samplerMethod == Bias);
173 lodOrBiasId = insn.word(operandsIndex);
174 operandsIndex += 1;
175 imageOperands &= ~spv::ImageOperandsBiasMask;
176 }
177
178 if(imageOperands & spv::ImageOperandsLodMask)
179 {
180 ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
181 lodOrBiasId = insn.word(operandsIndex);
182 operandsIndex += 1;
183 imageOperands &= ~spv::ImageOperandsLodMask;
184 }
185
186 if(imageOperands & spv::ImageOperandsGradMask)
187 {
188 ASSERT(samplerMethod == Grad);
189 gradDxId = insn.word(operandsIndex + 0);
190 gradDyId = insn.word(operandsIndex + 1);
191 operandsIndex += 2;
192 imageOperands &= ~spv::ImageOperandsGradMask;
193
194 grad = spirv.getObjectType(gradDxId).componentCount;
195 }
196
197 if(imageOperands & spv::ImageOperandsConstOffsetMask)
198 {
199 offsetId = insn.word(operandsIndex);
200 operandsIndex += 1;
201 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
202
203 offset = spirv.getObjectType(offsetId).componentCount;
204 }
205
206 if(imageOperands & spv::ImageOperandsSampleMask)
207 {
208 ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
209 sampleId = insn.word(operandsIndex);
210 operandsIndex += 1;
211 imageOperands &= ~spv::ImageOperandsSampleMask;
212
213 sample = !spirv.getObject(sampleId).isConstantZero();
214 }
215
216 // TODO(b/174475384)
217 if(imageOperands & spv::ImageOperandsZeroExtendMask)
218 {
219 ASSERT(samplerMethod == Read || samplerMethod == Write);
220 imageOperands &= ~spv::ImageOperandsZeroExtendMask;
221 }
222 else if(imageOperands & spv::ImageOperandsSignExtendMask)
223 {
224 ASSERT(samplerMethod == Read || samplerMethod == Write);
225 imageOperands &= ~spv::ImageOperandsSignExtendMask;
226 }
227
228 if(imageOperands != 0)
229 {
230 UNSUPPORTED("Image operands 0x%08X", imageOperands);
231 }
232 }
233
parseVariantAndMethod(InsnIterator insn)234 SpirvShader::ImageInstructionSignature SpirvShader::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
235 {
236 uint32_t imageOperands = getImageOperandsMask(insn);
237 bool bias = imageOperands & spv::ImageOperandsBiasMask;
238 bool grad = imageOperands & spv::ImageOperandsGradMask;
239
240 switch(insn.opcode())
241 {
242 case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
243 case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
244 case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
245 case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
246 case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
247 case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
248 case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
249 case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
250 case spv::OpImageGather: return { None, Gather };
251 case spv::OpImageDrefGather: return { Dref, Gather };
252 case spv::OpImageFetch: return { None, Fetch };
253 case spv::OpImageQueryLod: return { None, Query };
254 case spv::OpImageRead: return { None, Read };
255 case spv::OpImageWrite: return { None, Write };
256 case spv::OpImageTexelPointer: return { None, TexelPointer };
257
258 default:
259 ASSERT(false);
260 return { None, Implicit };
261 }
262 }
263
264 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)265 uint32_t SpirvShader::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
266 {
267 switch(insn.opcode())
268 {
269 case spv::OpImageSampleImplicitLod:
270 case spv::OpImageSampleProjImplicitLod:
271 return insn.wordCount() > 5 ? 5 : 0; // Optional
272 case spv::OpImageSampleExplicitLod:
273 case spv::OpImageSampleProjExplicitLod:
274 return 5; // "Either Lod or Grad image operands must be present."
275 case spv::OpImageSampleDrefImplicitLod:
276 case spv::OpImageSampleProjDrefImplicitLod:
277 return insn.wordCount() > 6 ? 6 : 0; // Optional
278 case spv::OpImageSampleDrefExplicitLod:
279 case spv::OpImageSampleProjDrefExplicitLod:
280 return 6; // "Either Lod or Grad image operands must be present."
281 case spv::OpImageGather:
282 case spv::OpImageDrefGather:
283 return insn.wordCount() > 6 ? 6 : 0; // Optional
284 case spv::OpImageFetch:
285 return insn.wordCount() > 5 ? 5 : 0; // Optional
286 case spv::OpImageQueryLod:
287 ASSERT(insn.wordCount() == 5);
288 return 0; // No image operands.
289 case spv::OpImageRead:
290 return insn.wordCount() > 5 ? 5 : 0; // Optional
291 case spv::OpImageWrite:
292 return insn.wordCount() > 4 ? 4 : 0; // Optional
293 case spv::OpImageTexelPointer:
294 ASSERT(insn.wordCount() == 6);
295 return 0; // No image operands.
296
297 default:
298 ASSERT(false);
299 return 0;
300 }
301 }
302
getImageOperandsMask(InsnIterator insn)303 uint32_t SpirvShader::ImageInstruction::getImageOperandsMask(InsnIterator insn)
304 {
305 uint32_t operandsIndex = getImageOperandsIndex(insn);
306 return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
307 }
308
EmitImageSample(const ImageInstruction & instruction,EmitState * state) const309 SpirvShader::EmitResult SpirvShader::EmitImageSample(const ImageInstruction &instruction, EmitState *state) const
310 {
311 auto &resultType = getType(instruction.resultTypeId);
312 auto &result = state->createIntermediate(instruction.resultId, resultType.componentCount);
313 Array<SIMD::Float> out(4);
314
315 // TODO(b/153380916): When we're in a code path that is always executed,
316 // i.e. post-dominators of the entry block, we don't have to dynamically
317 // check whether any lanes are active, and can elide the jump.
318 If(AnyTrue(state->activeLaneMask()))
319 {
320 EmitImageSampleUnconditional(out, instruction, state);
321 }
322
323 for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
324
325 return EmitResult::Continue;
326 }
327
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction,EmitState * state) const328 void SpirvShader::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction, EmitState *state) const
329 {
330 Pointer<Byte> imageDescriptor = state->getPointer(instruction.imageId).base; // vk::SampledImageDescriptor*
331
332 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, instruction, state);
333
334 callSamplerFunction(samplerFunction, out, imageDescriptor, instruction, state);
335 }
336
lookupSamplerFunction(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const337 Pointer<Byte> SpirvShader::lookupSamplerFunction(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
338 {
339 Int samplerId = 0;
340
341 if(instruction.samplerId != 0)
342 {
343 Pointer<Byte> samplerDescriptor = state->getPointer(instruction.samplerId).base; // vk::SampledImageDescriptor*
344
345 samplerId = *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId)); // vk::Sampler::id
346 }
347
348 auto &cache = state->routine->samplerCache.at(instruction.position);
349 Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId); // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
350
351 If(!cacheHit)
352 {
353 rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
354 cache.function = Call(getImageSampler, state->routine->device, instruction.signature, samplerId, imageViewId);
355 cache.imageDescriptor = imageDescriptor;
356 cache.samplerId = samplerId;
357 }
358
359 return cache.function;
360 }
361
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,EmitState * state) const362 void SpirvShader::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, EmitState *state) const
363 {
364 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
365
366 auto coordinate = Operand(this, state, instruction.coordinateId);
367
368 uint32_t i = 0;
369 for(; i < instruction.coordinates; i++)
370 {
371 if(instruction.isProj())
372 {
373 in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
374 }
375 else
376 {
377 in[i] = coordinate.Float(i);
378 }
379 }
380
381 if(instruction.isDref())
382 {
383 auto drefValue = Operand(this, state, instruction.drefId);
384
385 if(instruction.isProj())
386 {
387 in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
388 }
389 else
390 {
391 in[i] = drefValue.Float(0);
392 }
393
394 i++;
395 }
396
397 if(instruction.lodOrBiasId != 0)
398 {
399 auto lodValue = Operand(this, state, instruction.lodOrBiasId);
400 in[i] = lodValue.Float(0);
401 i++;
402 }
403 else if(instruction.gradDxId != 0)
404 {
405 auto dxValue = Operand(this, state, instruction.gradDxId);
406 auto dyValue = Operand(this, state, instruction.gradDyId);
407 ASSERT(dxValue.componentCount == dxValue.componentCount);
408
409 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
410 {
411 in[i] = dxValue.Float(j);
412 }
413
414 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
415 {
416 in[i] = dyValue.Float(j);
417 }
418 }
419 else if(instruction.samplerMethod == Fetch)
420 {
421 // The instruction didn't provide a lod operand, but the sampler's Fetch
422 // function requires one to be present. If no lod is supplied, the default
423 // is zero.
424 in[i] = As<SIMD::Float>(SIMD::Int(0));
425 i++;
426 }
427
428 if(instruction.offsetId != 0)
429 {
430 auto offsetValue = Operand(this, state, instruction.offsetId);
431
432 for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
433 {
434 in[i] = As<SIMD::Float>(offsetValue.Int(j)); // Integer values, but transfered as float.
435 }
436 }
437
438 if(instruction.sample)
439 {
440 auto sampleValue = Operand(this, state, instruction.sampleId);
441 in[i] = As<SIMD::Float>(sampleValue.Int(0));
442 }
443
444 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
445
446 Call<ImageSampler>(samplerFunction, texture, &in, &out, state->routine->constants);
447 }
448
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const449 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
450 {
451 auto &resultTy = getType(insn.resultTypeId());
452 auto imageId = Object::ID(insn.word(3));
453 auto lodId = Object::ID(insn.word(4));
454
455 auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
456 GetImageDimensions(state, resultTy, imageId, lodId, dst);
457
458 return EmitResult::Continue;
459 }
460
EmitImageQuerySize(InsnIterator insn,EmitState * state) const461 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
462 {
463 auto &resultTy = getType(insn.resultTypeId());
464 auto imageId = Object::ID(insn.word(3));
465 auto lodId = Object::ID(0);
466
467 auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
468 GetImageDimensions(state, resultTy, imageId, lodId, dst);
469
470 return EmitResult::Continue;
471 }
472
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const473 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
474 {
475 auto routine = state->routine;
476 auto &image = getObject(imageId);
477 auto &imageType = getType(image);
478
479 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
480 bool isArrayed = imageType.definition.word(5) != 0;
481 uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
482
483 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
484 auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
485
486 Pointer<Byte> descriptor = state->getPointer(imageId).base;
487
488 Int width;
489 Int height;
490 Int depth;
491
492 switch(descriptorType)
493 {
494 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
495 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
496 width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
497 height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
498 depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
499 break;
500 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
501 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
502 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
503 width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
504 height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
505 depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
506 break;
507 default:
508 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
509 }
510
511 if(lodId != 0)
512 {
513 auto lodVal = Operand(this, state, lodId);
514 ASSERT(lodVal.componentCount == 1);
515 auto lod = lodVal.Int(0);
516 auto one = SIMD::Int(1);
517
518 if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
519 if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
520 if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
521 }
522 else
523 {
524
525 if(dimensions >= 1) dst.move(0, SIMD::Int(width));
526 if(dimensions >= 2) dst.move(1, SIMD::Int(height));
527 if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
528 }
529
530 if(isArrayed)
531 {
532 dst.move(dimensions, SIMD::Int(depth));
533 }
534 }
535
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const536 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
537 {
538 auto &resultTy = getType(insn.resultTypeId());
539 ASSERT(resultTy.componentCount == 1);
540 auto imageId = Object::ID(insn.word(3));
541
542 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
543 auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
544
545 Pointer<Byte> descriptor = state->getPointer(imageId).base;
546 Int mipLevels = 0;
547 switch(descriptorType)
548 {
549 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
550 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
551 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
552 mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
553 break;
554 default:
555 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
556 }
557
558 auto &dst = state->createIntermediate(insn.resultId(), 1);
559 dst.move(0, SIMD::Int(mipLevels));
560
561 return EmitResult::Continue;
562 }
563
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const564 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
565 {
566 auto &resultTy = getType(insn.resultTypeId());
567 ASSERT(resultTy.componentCount == 1);
568 auto imageId = Object::ID(insn.word(3));
569 auto imageTy = getObjectType(imageId);
570 ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
571 ASSERT(imageTy.definition.word(3) == spv::Dim2D);
572 ASSERT(imageTy.definition.word(6 /* MS */) == 1);
573
574 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
575 auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
576
577 Pointer<Byte> descriptor = state->getPointer(imageId).base;
578 Int sampleCount = 0;
579 switch(descriptorType)
580 {
581 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
582 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
583 break;
584 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
585 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
586 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
587 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
588 break;
589 default:
590 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
591 }
592
593 auto &dst = state->createIntermediate(insn.resultId(), 1);
594 dst.move(0, SIMD::Int(sampleCount));
595
596 return EmitResult::Continue;
597 }
598
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const EmitState * state)599 SIMD::Pointer SpirvShader::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const EmitState *state)
600 {
601 bool isArrayed = instruction.arrayed;
602 spv::Dim dim = static_cast<spv::Dim>(instruction.dim);
603 int dims = instruction.coordinates - (isArrayed ? 1 : 0);
604
605 SIMD::Int u = coordinate[0];
606 SIMD::Int v = SIMD::Int(0);
607
608 if(dims > 1)
609 {
610 v = coordinate[1];
611 }
612
613 if(dim == spv::DimSubpassData)
614 {
615 u += state->routine->windowSpacePosition[0];
616 v += state->routine->windowSpacePosition[1];
617 }
618
619 const int texelSize = imageFormat.bytes();
620 const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
621 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
622 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
623 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
624 auto slicePitch = SIMD::Int(
625 *Pointer<Int>(descriptor + (useStencilAspect
626 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
627 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
628 auto samplePitch = SIMD::Int(
629 *Pointer<Int>(descriptor + (useStencilAspect
630 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
631 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
632
633 SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
634
635 if(dims > 1)
636 {
637 ptrOffset += v * rowPitch;
638 }
639
640 SIMD::Int w = 0;
641 if((dims > 2) || isArrayed)
642 {
643 if(dims > 2)
644 {
645 w += coordinate[2];
646 }
647
648 if(isArrayed)
649 {
650 w += coordinate[dims];
651 }
652
653 ptrOffset += w * slicePitch;
654 }
655
656 if(dim == spv::DimSubpassData)
657 {
658 // Multiview input attachment access is to the layer corresponding to the current view
659 ptrOffset += SIMD::Int(state->routine->viewID) * slicePitch;
660 }
661
662 if(instruction.sample)
663 {
664 ptrOffset += sample * samplePitch;
665 }
666
667 // If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
668 // Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
669 if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
670 {
671 SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
672 SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
673
674 if(dims > 1)
675 {
676 SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
677 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
678 }
679
680 if((dims > 2) || isArrayed)
681 {
682 UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
683 if(dim == spv::DimCube) { depth *= 6; }
684 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth)));
685 }
686
687 if(instruction.sample)
688 {
689 SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
690 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
691 }
692
693 constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16; // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
694 static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
695
696 ptrOffset = (ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET)); // oob ? OOB_OFFSET : ptrOffset // TODO: IfThenElse()
697 }
698
699 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
700 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
701 : OFFSET(vk::StorageImageDescriptor, ptr)));
702
703 Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
704
705 return SIMD::Pointer(imageBase, imageSizeInBytes, ptrOffset);
706 }
707
EmitImageRead(const ImageInstruction & instruction,EmitState * state) const708 SpirvShader::EmitResult SpirvShader::EmitImageRead(const ImageInstruction &instruction, EmitState *state) const
709 {
710 auto &resultType = getObjectType(instruction.resultId);
711 auto &image = getObject(instruction.imageId);
712 auto &imageType = getType(image);
713
714 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
715 auto dim = static_cast<spv::Dim>(instruction.dim);
716
717 auto coordinate = Operand(this, state, instruction.coordinateId);
718 const DescriptorDecorations &d = descriptorDecorations.at(instruction.imageId);
719
720 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
721 // the renderpass data instead. In all other cases, we can use the format in the instruction.
722 vk::Format imageFormat = (dim == spv::DimSubpassData)
723 ? inputAttachmentFormats[d.InputAttachmentIndex]
724 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
725
726 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
727 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
728 bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
729 getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
730
731 if(useStencilAspect)
732 {
733 imageFormat = VK_FORMAT_S8_UINT;
734 }
735
736 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
737 auto &dst = state->createIntermediate(instruction.resultId, resultType.componentCount);
738
739 // VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
740 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
741 auto robustness = OutOfBoundsBehavior::Nullify;
742
743 SIMD::Int uvwa[4];
744 SIMD::Int sample;
745
746 for(uint32_t i = 0; i < instruction.coordinates; i++)
747 {
748 uvwa[i] = coordinate.Int(i);
749 }
750
751 if(instruction.sample)
752 {
753 sample = Operand(this, state, instruction.sampleId).Int(0);
754 }
755
756 auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
757
758 const int texelSize = imageFormat.bytes();
759
760 // Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
761 // TODO(b/160531165): Provide gather abstractions for various element sizes.
762 SIMD::Int packed[4];
763 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
764 {
765 for(auto i = 0; i < texelSize / 4; i++)
766 {
767 packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
768 texelPtr += sizeof(float);
769 }
770 }
771 else if(texelSize == 2)
772 {
773 SIMD::Int offsets = texelPtr.offsets();
774 SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
775
776 for(int i = 0; i < SIMD::Width; i++)
777 {
778 If(Extract(mask, i) != 0)
779 {
780 packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
781 }
782 }
783 }
784 else if(texelSize == 1)
785 {
786 SIMD::Int offsets = texelPtr.offsets();
787 SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
788
789 for(int i = 0; i < SIMD::Width; i++)
790 {
791 If(Extract(mask, i) != 0)
792 {
793 packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
794 }
795 }
796 }
797 else
798 UNREACHABLE("texelSize: %d", int(texelSize));
799
800 // Format support requirements here come from two sources:
801 // - Minimum required set of formats for loads from storage images
802 // - Any format supported as a color or depth/stencil attachment, for input attachments
803 switch(imageFormat)
804 {
805 case VK_FORMAT_R32G32B32A32_SFLOAT:
806 case VK_FORMAT_R32G32B32A32_SINT:
807 case VK_FORMAT_R32G32B32A32_UINT:
808 dst.move(0, packed[0]);
809 dst.move(1, packed[1]);
810 dst.move(2, packed[2]);
811 dst.move(3, packed[3]);
812 break;
813 case VK_FORMAT_R32_SINT:
814 case VK_FORMAT_R32_UINT:
815 dst.move(0, packed[0]);
816 // Fill remaining channels with 0,0,1 (of the correct type)
817 dst.move(1, SIMD::Int(0));
818 dst.move(2, SIMD::Int(0));
819 dst.move(3, SIMD::Int(1));
820 break;
821 case VK_FORMAT_R32_SFLOAT:
822 case VK_FORMAT_D32_SFLOAT:
823 case VK_FORMAT_D32_SFLOAT_S8_UINT:
824 dst.move(0, packed[0]);
825 // Fill remaining channels with 0,0,1 (of the correct type)
826 dst.move(1, SIMD::Float(0.0f));
827 dst.move(2, SIMD::Float(0.0f));
828 dst.move(3, SIMD::Float(1.0f));
829 break;
830 case VK_FORMAT_D16_UNORM:
831 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
832 dst.move(1, SIMD::Float(0.0f));
833 dst.move(2, SIMD::Float(0.0f));
834 dst.move(3, SIMD::Float(1.0f));
835 break;
836 case VK_FORMAT_R16G16B16A16_UNORM:
837 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
838 dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
839 dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
840 dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
841 break;
842 case VK_FORMAT_R16G16B16A16_SNORM:
843 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
844 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
845 dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
846 dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
847 break;
848 case VK_FORMAT_R16G16B16A16_SINT:
849 dst.move(0, (packed[0] << 16) >> 16);
850 dst.move(1, packed[0] >> 16);
851 dst.move(2, (packed[1] << 16) >> 16);
852 dst.move(3, packed[1] >> 16);
853 break;
854 case VK_FORMAT_R16G16B16A16_UINT:
855 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
856 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
857 dst.move(2, packed[1] & SIMD::Int(0xFFFF));
858 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
859 break;
860 case VK_FORMAT_R16G16B16A16_SFLOAT:
861 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
862 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
863 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
864 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
865 break;
866 case VK_FORMAT_R8G8B8A8_SNORM:
867 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
868 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
869 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
870 dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
871 dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
872 break;
873 case VK_FORMAT_R8G8B8A8_UNORM:
874 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
875 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
876 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
877 dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
878 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
879 break;
880 case VK_FORMAT_R8G8B8A8_SRGB:
881 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
882 dst.move(0, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
883 dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
884 dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
885 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
886 break;
887 case VK_FORMAT_B8G8R8A8_UNORM:
888 dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
889 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
890 dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
891 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
892 break;
893 case VK_FORMAT_B8G8R8A8_SRGB:
894 dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
895 dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
896 dst.move(2, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
897 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
898 break;
899 case VK_FORMAT_R8G8B8A8_UINT:
900 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
901 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
902 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
903 dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
904 dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
905 break;
906 case VK_FORMAT_R8G8B8A8_SINT:
907 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
908 dst.move(0, (packed[0] << 24) >> 24);
909 dst.move(1, (packed[0] << 16) >> 24);
910 dst.move(2, (packed[0] << 8) >> 24);
911 dst.move(3, packed[0] >> 24);
912 break;
913 case VK_FORMAT_R8_UNORM:
914 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
915 dst.move(1, SIMD::Float(0.0f));
916 dst.move(2, SIMD::Float(0.0f));
917 dst.move(3, SIMD::Float(1.0f));
918 break;
919 case VK_FORMAT_R8_SNORM:
920 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
921 dst.move(1, SIMD::Float(0.0f));
922 dst.move(2, SIMD::Float(0.0f));
923 dst.move(3, SIMD::Float(1.0f));
924 break;
925 case VK_FORMAT_R8_UINT:
926 case VK_FORMAT_S8_UINT:
927 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
928 dst.move(1, SIMD::UInt(0));
929 dst.move(2, SIMD::UInt(0));
930 dst.move(3, SIMD::UInt(1));
931 break;
932 case VK_FORMAT_R8_SINT:
933 dst.move(0, (packed[0] << 24) >> 24);
934 dst.move(1, SIMD::Int(0));
935 dst.move(2, SIMD::Int(0));
936 dst.move(3, SIMD::Int(1));
937 break;
938 case VK_FORMAT_R8G8_UNORM:
939 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
940 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
941 dst.move(2, SIMD::Float(0.0f));
942 dst.move(3, SIMD::Float(1.0f));
943 break;
944 case VK_FORMAT_R8G8_SNORM:
945 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
946 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
947 dst.move(2, SIMD::Float(0.0f));
948 dst.move(3, SIMD::Float(1.0f));
949 break;
950 case VK_FORMAT_R8G8_UINT:
951 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
952 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
953 dst.move(2, SIMD::UInt(0));
954 dst.move(3, SIMD::UInt(1));
955 break;
956 case VK_FORMAT_R8G8_SINT:
957 dst.move(0, (packed[0] << 24) >> 24);
958 dst.move(1, (packed[0] << 16) >> 24);
959 dst.move(2, SIMD::Int(0));
960 dst.move(3, SIMD::Int(1));
961 break;
962 case VK_FORMAT_R16_SFLOAT:
963 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
964 dst.move(1, SIMD::Float(0.0f));
965 dst.move(2, SIMD::Float(0.0f));
966 dst.move(3, SIMD::Float(1.0f));
967 break;
968 case VK_FORMAT_R16_UNORM:
969 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
970 dst.move(1, SIMD::Float(0.0f));
971 dst.move(2, SIMD::Float(0.0f));
972 dst.move(3, SIMD::Float(1.0f));
973 break;
974 case VK_FORMAT_R16_SNORM:
975 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
976 dst.move(1, SIMD::Float(0.0f));
977 dst.move(2, SIMD::Float(0.0f));
978 dst.move(3, SIMD::Float(1.0f));
979 break;
980 case VK_FORMAT_R16_UINT:
981 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
982 dst.move(1, SIMD::UInt(0));
983 dst.move(2, SIMD::UInt(0));
984 dst.move(3, SIMD::UInt(1));
985 break;
986 case VK_FORMAT_R16_SINT:
987 dst.move(0, (packed[0] << 16) >> 16);
988 dst.move(1, SIMD::Int(0));
989 dst.move(2, SIMD::Int(0));
990 dst.move(3, SIMD::Int(1));
991 break;
992 case VK_FORMAT_R16G16_SFLOAT:
993 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
994 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
995 dst.move(2, SIMD::Float(0.0f));
996 dst.move(3, SIMD::Float(1.0f));
997 break;
998 case VK_FORMAT_R16G16_UNORM:
999 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1000 dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1001 dst.move(2, SIMD::Float(0.0f));
1002 dst.move(3, SIMD::Float(1.0f));
1003 break;
1004 case VK_FORMAT_R16G16_SNORM:
1005 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1006 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1007 dst.move(2, SIMD::Float(0.0f));
1008 dst.move(3, SIMD::Float(1.0f));
1009 break;
1010 case VK_FORMAT_R16G16_UINT:
1011 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1012 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1013 dst.move(2, SIMD::UInt(0));
1014 dst.move(3, SIMD::UInt(1));
1015 break;
1016 case VK_FORMAT_R16G16_SINT:
1017 dst.move(0, (packed[0] << 16) >> 16);
1018 dst.move(1, packed[0] >> 16);
1019 dst.move(2, SIMD::Int(0));
1020 dst.move(3, SIMD::Int(1));
1021 break;
1022 case VK_FORMAT_R32G32_SINT:
1023 case VK_FORMAT_R32G32_UINT:
1024 dst.move(0, packed[0]);
1025 dst.move(1, packed[1]);
1026 dst.move(2, SIMD::Int(0));
1027 dst.move(3, SIMD::Int(1));
1028 break;
1029 case VK_FORMAT_R32G32_SFLOAT:
1030 dst.move(0, packed[0]);
1031 dst.move(1, packed[1]);
1032 dst.move(2, SIMD::Float(0.0f));
1033 dst.move(3, SIMD::Float(1.0f));
1034 break;
1035 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1036 dst.move(0, packed[0] & SIMD::Int(0x3FF));
1037 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1038 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1039 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1040 break;
1041 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1042 dst.move(2, packed[0] & SIMD::Int(0x3FF));
1043 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1044 dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1045 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1046 break;
1047 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1048 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1049 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1050 dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1051 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1052 break;
1053 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1054 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1055 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1056 dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1057 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1058 break;
1059 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1060 dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1061 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1062 dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1063 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1064 break;
1065 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1066 dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1067 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1068 dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1069 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1070 break;
1071 case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1072 dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1073 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1074 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1075 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1076 break;
1077 case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1078 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1079 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1080 dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1081 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1082 break;
1083 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1084 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1085 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1086 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1087 dst.move(3, SIMD::Float(1.0f));
1088 break;
1089 case VK_FORMAT_B5G6R5_UNORM_PACK16:
1090 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1091 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1092 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1093 dst.move(3, SIMD::Float(1.0f));
1094 break;
1095 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1096 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1097 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1098 dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1099 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1100 break;
1101 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1102 dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1103 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1104 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1105 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1106 break;
1107 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1108 dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1109 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1110 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1111 dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1112 break;
1113 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1114 dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1115 dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1116 dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1117 dst.move(3, SIMD::Float(1.0f));
1118 break;
1119 default:
1120 UNSUPPORTED("VkFormat %d", int(imageFormat));
1121 break;
1122 }
1123
1124 return EmitResult::Continue;
1125 }
1126
EmitImageWrite(const ImageInstruction & instruction,EmitState * state) const1127 SpirvShader::EmitResult SpirvShader::EmitImageWrite(const ImageInstruction &instruction, EmitState *state) const
1128 {
1129 imageWriteEmitted = true;
1130
1131 auto &image = getObject(instruction.imageId);
1132 auto &imageType = getType(image);
1133
1134 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1135 ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData); // "Its Dim operand must not be SubpassData."
1136
1137 auto coordinate = Operand(this, state, instruction.coordinateId);
1138 auto texel = Operand(this, state, instruction.texelId);
1139
1140 Array<SIMD::Int> coord(5); // uvwa & sample
1141
1142 uint32_t i = 0;
1143 for(; i < instruction.coordinates; i++)
1144 {
1145 coord[i] = coordinate.Int(i);
1146 }
1147
1148 if(instruction.sample)
1149 {
1150 coord[i] = Operand(this, state, instruction.sampleId).Int(0);
1151 }
1152
1153 Array<SIMD::Int> texelAndMask(5);
1154 texelAndMask[0] = texel.Int(0);
1155 texelAndMask[1] = texel.Int(1);
1156 texelAndMask[2] = texel.Int(2);
1157 texelAndMask[3] = texel.Int(3);
1158 texelAndMask[4] = state->activeStoresAndAtomicsMask();
1159
1160 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
1161
1162 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1163
1164 if(imageFormat == VK_FORMAT_UNDEFINED) // spv::ImageFormatUnknown
1165 {
1166 Pointer<Byte> samplerFunction = lookupSamplerFunction(descriptor, instruction, state);
1167
1168 Call<ImageSampler>(samplerFunction, descriptor, &coord, &texelAndMask, state->routine->constants);
1169 }
1170 else
1171 {
1172 WriteImage(instruction, descriptor, &coord, &texelAndMask, imageFormat);
1173 }
1174
1175 return EmitResult::Continue;
1176 }
1177
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1178 void SpirvShader::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1179 {
1180 SIMD::Int texel[4];
1181 texel[0] = texelAndMask[0];
1182 texel[1] = texelAndMask[1];
1183 texel[2] = texelAndMask[2];
1184 texel[3] = texelAndMask[3];
1185 SIMD::Int mask = texelAndMask[4];
1186
1187 SIMD::Int packed[4];
1188 switch(imageFormat)
1189 {
1190 case VK_FORMAT_R32G32B32A32_SFLOAT:
1191 case VK_FORMAT_R32G32B32A32_SINT:
1192 case VK_FORMAT_R32G32B32A32_UINT:
1193 packed[0] = texel[0];
1194 packed[1] = texel[1];
1195 packed[2] = texel[2];
1196 packed[3] = texel[3];
1197 break;
1198 case VK_FORMAT_R32_SFLOAT:
1199 case VK_FORMAT_R32_SINT:
1200 case VK_FORMAT_R32_UINT:
1201 packed[0] = texel[0];
1202 break;
1203 case VK_FORMAT_R8G8B8A8_UNORM:
1204 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1205 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1206 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1207 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1208 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1209 break;
1210 case VK_FORMAT_B8G8R8A8_UNORM:
1211 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1212 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1213 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1214 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1215 break;
1216 case VK_FORMAT_R8G8B8A8_SNORM:
1217 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1218 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1219 SIMD::Int(0xFF)) |
1220 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1221 SIMD::Int(0xFF))
1222 << 8) |
1223 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1224 SIMD::Int(0xFF))
1225 << 16) |
1226 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1227 SIMD::Int(0xFF))
1228 << 24);
1229 break;
1230 case VK_FORMAT_R8G8B8A8_SINT:
1231 case VK_FORMAT_R8G8B8A8_UINT:
1232 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1233 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1234 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1235 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1236 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1237 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1238 break;
1239 case VK_FORMAT_R16G16B16A16_SFLOAT:
1240 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1241 packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1242 break;
1243 case VK_FORMAT_R16G16B16A16_SINT:
1244 case VK_FORMAT_R16G16B16A16_UINT:
1245 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1246 packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1247 break;
1248 case VK_FORMAT_R32G32_SFLOAT:
1249 case VK_FORMAT_R32G32_SINT:
1250 case VK_FORMAT_R32G32_UINT:
1251 packed[0] = texel[0];
1252 packed[1] = texel[1];
1253 break;
1254 case VK_FORMAT_R16G16_SFLOAT:
1255 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1256 break;
1257 case VK_FORMAT_R16G16_SINT:
1258 case VK_FORMAT_R16G16_UINT:
1259 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1260 break;
1261 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1262 // Truncates instead of rounding. See b/147900455
1263 packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1264 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1265 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1266 break;
1267 case VK_FORMAT_R16_SFLOAT:
1268 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1269 break;
1270 case VK_FORMAT_R16G16B16A16_UNORM:
1271 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1272 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1273 packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1274 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1275 break;
1276 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1277 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1278 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1279 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1280 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1281 break;
1282 case VK_FORMAT_R16G16_UNORM:
1283 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1284 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1285 break;
1286 case VK_FORMAT_R8G8_UNORM:
1287 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1288 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1289 break;
1290 case VK_FORMAT_R16_UNORM:
1291 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1292 break;
1293 case VK_FORMAT_R8_UNORM:
1294 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1295 break;
1296 case VK_FORMAT_R16G16B16A16_SNORM:
1297 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1298 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1299 packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1300 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1301 break;
1302 case VK_FORMAT_R16G16_SNORM:
1303 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1304 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1305 break;
1306 case VK_FORMAT_R8G8_SNORM:
1307 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1308 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1309 break;
1310 case VK_FORMAT_R16_SNORM:
1311 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1312 break;
1313 case VK_FORMAT_R8_SNORM:
1314 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1315 break;
1316 case VK_FORMAT_R8G8_SINT:
1317 case VK_FORMAT_R8G8_UINT:
1318 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1319 break;
1320 case VK_FORMAT_R16_SINT:
1321 case VK_FORMAT_R16_UINT:
1322 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1323 break;
1324 case VK_FORMAT_R8_SINT:
1325 case VK_FORMAT_R8_UINT:
1326 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1327 break;
1328 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1329 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1330 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1331 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1332 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1333 break;
1334 default:
1335 UNSUPPORTED("VkFormat %d", int(imageFormat));
1336 break;
1337 }
1338
1339 // "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1340 // validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1341 // - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1342 auto robustness = OutOfBoundsBehavior::Nullify;
1343 // GetTexelAddress() only needs the EmitState* for SubpassData accesses (i.e. input attachments).
1344 const EmitState *state = nullptr;
1345
1346 SIMD::Int uvwa[4];
1347 SIMD::Int sample;
1348
1349 uint32_t i = 0;
1350 for(; i < instruction.coordinates; i++)
1351 {
1352 uvwa[i] = As<SIMD::Int>(coord[i]);
1353 }
1354
1355 if(instruction.sample)
1356 {
1357 sample = As<SIMD::Int>(coord[i]);
1358 }
1359
1360 auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1361
1362 const int texelSize = imageFormat.bytes();
1363
1364 // Scatter packed texel data.
1365 // TODO(b/160531165): Provide scatter abstractions for various element sizes.
1366 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1367 {
1368 for(auto i = 0; i < texelSize / 4; i++)
1369 {
1370 texelPtr.Store(packed[i], robustness, mask);
1371 texelPtr += sizeof(float);
1372 }
1373 }
1374 else if(texelSize == 2)
1375 {
1376 SIMD::Int offsets = texelPtr.offsets();
1377 mask = mask & texelPtr.isInBounds(2, robustness);
1378
1379 for(int i = 0; i < SIMD::Width; i++)
1380 {
1381 If(Extract(mask, i) != 0)
1382 {
1383 *Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
1384 }
1385 }
1386 }
1387 else if(texelSize == 1)
1388 {
1389 SIMD::Int offsets = texelPtr.offsets();
1390 mask = mask & texelPtr.isInBounds(1, robustness);
1391
1392 for(int i = 0; i < SIMD::Width; i++)
1393 {
1394 If(Extract(mask, i) != 0)
1395 {
1396 *Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
1397 }
1398 }
1399 }
1400 else
1401 UNREACHABLE("texelSize: %d", int(texelSize));
1402 }
1403
EmitImageTexelPointer(const ImageInstruction & instruction,EmitState * state) const1404 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(const ImageInstruction &instruction, EmitState *state) const
1405 {
1406 auto coordinate = Operand(this, state, instruction.coordinateId);
1407
1408 Pointer<Byte> descriptor = state->getPointer(instruction.imageId).base; // vk::StorageImageDescriptor*
1409
1410 // VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1411 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1412 auto robustness = OutOfBoundsBehavior::Nullify;
1413 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1414
1415 SIMD::Int uvwa[4];
1416
1417 for(uint32_t i = 0; i < instruction.coordinates; i++)
1418 {
1419 uvwa[i] = coordinate.Int(i);
1420 }
1421
1422 SIMD::Int sample = Operand(this, state, instruction.sampleId).Int(0);
1423
1424 auto ptr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, state);
1425
1426 state->createPointer(instruction.resultId, ptr);
1427
1428 return EmitResult::Continue;
1429 }
1430
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1431 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1432 {
1433 // Propagate the image pointer in both cases.
1434 // Consumers of OpSampledImage will look through to find the sampler pointer.
1435
1436 Object::ID resultId = insn.word(2);
1437 Object::ID imageId = insn.word(3);
1438
1439 state->createPointer(resultId, state->getPointer(imageId));
1440
1441 return EmitResult::Continue;
1442 }
1443
1444 } // namespace sw