1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "SpirvProfiler.hpp"
18 #include "SpirvShaderDebug.hpp"
19
20 #include "System/Debug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Vulkan/VkRenderPass.hpp"
23
24 #include "marl/defer.h"
25
26 #include <spirv/unified1/spirv.hpp>
27
28 namespace sw {
29
SpirvShader(VkShaderStageFlagBits pipelineStage,const char * entryPointName,SpirvBinary const & insns,const vk::RenderPass * renderPass,uint32_t subpassIndex,bool robustBufferAccess,const std::shared_ptr<vk::dbg::Context> & dbgctx,std::shared_ptr<SpirvProfiler> profiler)30 SpirvShader::SpirvShader(
31 VkShaderStageFlagBits pipelineStage,
32 const char *entryPointName,
33 SpirvBinary const &insns,
34 const vk::RenderPass *renderPass,
35 uint32_t subpassIndex,
36 bool robustBufferAccess,
37 const std::shared_ptr<vk::dbg::Context> &dbgctx,
38 std::shared_ptr<SpirvProfiler> profiler)
39 : insns{ insns }
40 , inputs{ MAX_INTERFACE_COMPONENTS }
41 , outputs{ MAX_INTERFACE_COMPONENTS }
42 , robustBufferAccess(robustBufferAccess)
43 , profiler(profiler)
44 {
45 ASSERT(insns.size() > 0);
46
47 if(dbgctx)
48 {
49 dbgInit(dbgctx);
50 }
51
52 if(renderPass)
53 {
54 // capture formats of any input attachments present
55 auto subpass = renderPass->getSubpass(subpassIndex);
56 inputAttachmentFormats.reserve(subpass.inputAttachmentCount);
57 for(auto i = 0u; i < subpass.inputAttachmentCount; i++)
58 {
59 auto attachmentIndex = subpass.pInputAttachments[i].attachment;
60 inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
61 ? renderPass->getAttachment(attachmentIndex).format
62 : VK_FORMAT_UNDEFINED);
63 }
64 }
65
66 // The identifiers of all OpVariables that define the entry point's IO variables.
67 std::unordered_set<Object::ID> interfaceIds;
68
69 Function::ID currentFunction;
70 Block::ID currentBlock;
71 InsnIterator blockStart;
72
73 for(auto insn : *this)
74 {
75 spv::Op opcode = insn.opcode();
76
77 switch(opcode)
78 {
79 case spv::OpEntryPoint:
80 {
81 spv::ExecutionModel executionModel = spv::ExecutionModel(insn.word(1));
82 Function::ID entryPoint = Function::ID(insn.word(2));
83 const char *name = insn.string(3);
84 VkShaderStageFlagBits stage = executionModelToStage(executionModel);
85
86 if(stage == pipelineStage && strcmp(name, entryPointName) == 0)
87 {
88 ASSERT_MSG(this->entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
89 this->entryPoint = entryPoint;
90 this->executionModel = executionModel;
91
92 auto interfaceIdsOffset = 3 + insn.stringSizeInWords(3);
93 for(uint32_t i = interfaceIdsOffset; i < insn.wordCount(); i++)
94 {
95 interfaceIds.emplace(insn.word(i));
96 }
97 }
98 }
99 break;
100
101 case spv::OpExecutionMode:
102 case spv::OpExecutionModeId:
103 ProcessExecutionMode(insn);
104 break;
105
106 case spv::OpDecorate:
107 {
108 TypeOrObjectID targetId = insn.word(1);
109 auto decoration = static_cast<spv::Decoration>(insn.word(2));
110 uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
111
112 decorations[targetId].Apply(decoration, value);
113
114 switch(decoration)
115 {
116 case spv::DecorationDescriptorSet:
117 descriptorDecorations[targetId].DescriptorSet = value;
118 break;
119 case spv::DecorationBinding:
120 descriptorDecorations[targetId].Binding = value;
121 break;
122 case spv::DecorationInputAttachmentIndex:
123 descriptorDecorations[targetId].InputAttachmentIndex = value;
124 break;
125 case spv::DecorationSample:
126 analysis.ContainsSampleQualifier = true;
127 break;
128 default:
129 // Only handling descriptor decorations here.
130 break;
131 }
132
133 if(decoration == spv::DecorationCentroid)
134 {
135 analysis.NeedsCentroid = true;
136 }
137 }
138 break;
139
140 case spv::OpMemberDecorate:
141 {
142 Type::ID targetId = insn.word(1);
143 auto memberIndex = insn.word(2);
144 auto decoration = static_cast<spv::Decoration>(insn.word(3));
145 uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
146
147 auto &d = memberDecorations[targetId];
148 if(memberIndex >= d.size())
149 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
150
151 d[memberIndex].Apply(decoration, value);
152
153 if(decoration == spv::DecorationCentroid)
154 {
155 analysis.NeedsCentroid = true;
156 }
157 }
158 break;
159
160 case spv::OpDecorateId:
161 {
162 auto decoration = static_cast<spv::Decoration>(insn.word(2));
163
164 // Currently OpDecorateId only supports UniformId, which provides information for
165 // potential optimizations that we don't perform, and CounterBuffer, which is used
166 // by HLSL to build the graphics pipeline with shader reflection. At the driver level,
167 // the CounterBuffer decoration does nothing, so we can safely ignore both decorations.
168 ASSERT(decoration == spv::DecorationUniformId || decoration == spv::DecorationCounterBuffer);
169 }
170 break;
171
172 case spv::OpDecorateString:
173 {
174 auto decoration = static_cast<spv::Decoration>(insn.word(2));
175
176 // We assume these are for HLSL semantics, ignore them (b/214576937).
177 ASSERT(decoration == spv::DecorationUserSemantic || decoration == spv::DecorationUserTypeGOOGLE);
178 }
179 break;
180
181 case spv::OpMemberDecorateString:
182 {
183 auto decoration = static_cast<spv::Decoration>(insn.word(3));
184
185 // We assume these are for HLSL semantics, ignore them (b/214576937).
186 ASSERT(decoration == spv::DecorationUserSemantic || decoration == spv::DecorationUserTypeGOOGLE);
187 }
188 break;
189
190 case spv::OpDecorationGroup:
191 // Nothing to do here. We don't need to record the definition of the group; we'll just have
192 // the bundle of decorations float around. If we were to ever walk the decorations directly,
193 // we might think about introducing this as a real Object.
194 break;
195
196 case spv::OpGroupDecorate:
197 {
198 uint32_t group = insn.word(1);
199 auto const &groupDecorations = decorations[group];
200 auto const &descriptorGroupDecorations = descriptorDecorations[group];
201 for(auto i = 2u; i < insn.wordCount(); i++)
202 {
203 // Remaining operands are targets to apply the group to.
204 uint32_t target = insn.word(i);
205 decorations[target].Apply(groupDecorations);
206 descriptorDecorations[target].Apply(descriptorGroupDecorations);
207 }
208 }
209 break;
210
211 case spv::OpGroupMemberDecorate:
212 {
213 auto const &srcDecorations = decorations[insn.word(1)];
214 for(auto i = 2u; i < insn.wordCount(); i += 2)
215 {
216 // remaining operands are pairs of <id>, literal for members to apply to.
217 auto &d = memberDecorations[insn.word(i)];
218 auto memberIndex = insn.word(i + 1);
219 if(memberIndex >= d.size())
220 d.resize(memberIndex + 1); // on demand resize, see above...
221 d[memberIndex].Apply(srcDecorations);
222 }
223 }
224 break;
225
226 case spv::OpLabel:
227 {
228 ASSERT(currentBlock == 0);
229 currentBlock = Block::ID(insn.word(1));
230 blockStart = insn;
231 }
232 break;
233
234 // Termination instructions:
235 case spv::OpKill:
236 case spv::OpTerminateInvocation:
237 analysis.ContainsDiscard = true;
238 // [[fallthrough]]
239
240 case spv::OpUnreachable:
241
242 // Branch Instructions (subset of Termination Instructions):
243 case spv::OpBranch:
244 case spv::OpBranchConditional:
245 case spv::OpSwitch:
246 case spv::OpReturn:
247 {
248 ASSERT(currentBlock != 0);
249 ASSERT(currentFunction != 0);
250
251 auto blockEnd = insn;
252 blockEnd++;
253 functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
254 currentBlock = Block::ID(0);
255 }
256 break;
257
258 case spv::OpDemoteToHelperInvocation:
259 analysis.ContainsDiscard = true;
260 break;
261
262 case spv::OpLoopMerge:
263 case spv::OpSelectionMerge:
264 break; // Nothing to do in analysis pass.
265
266 case spv::OpTypeVoid:
267 case spv::OpTypeBool:
268 case spv::OpTypeInt:
269 case spv::OpTypeFloat:
270 case spv::OpTypeVector:
271 case spv::OpTypeMatrix:
272 case spv::OpTypeImage:
273 case spv::OpTypeSampler:
274 case spv::OpTypeSampledImage:
275 case spv::OpTypeArray:
276 case spv::OpTypeRuntimeArray:
277 case spv::OpTypeStruct:
278 case spv::OpTypePointer:
279 case spv::OpTypeFunction:
280 DeclareType(insn);
281 break;
282
283 case spv::OpVariable:
284 {
285 Type::ID typeId = insn.word(1);
286 Object::ID resultId = insn.word(2);
287 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
288
289 auto &object = defs[resultId];
290 object.kind = Object::Kind::Pointer;
291 object.definition = insn;
292
293 ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
294 ASSERT(getType(typeId).storageClass == storageClass);
295
296 switch(storageClass)
297 {
298 case spv::StorageClassInput:
299 case spv::StorageClassOutput:
300 if(interfaceIds.count(resultId))
301 {
302 ProcessInterfaceVariable(object);
303 }
304 break;
305
306 case spv::StorageClassUniform:
307 case spv::StorageClassStorageBuffer:
308 object.kind = Object::Kind::DescriptorSet;
309 break;
310
311 case spv::StorageClassPushConstant:
312 case spv::StorageClassPrivate:
313 case spv::StorageClassFunction:
314 case spv::StorageClassUniformConstant:
315 break; // Correctly handled.
316
317 case spv::StorageClassWorkgroup:
318 {
319 auto &elTy = getType(getType(typeId).element);
320 auto sizeInBytes = elTy.componentCount * static_cast<uint32_t>(sizeof(float));
321 workgroupMemory.allocate(resultId, sizeInBytes);
322 object.kind = Object::Kind::Pointer;
323 }
324 break;
325 case spv::StorageClassAtomicCounter:
326 case spv::StorageClassImage:
327 UNSUPPORTED("StorageClass %d not yet supported", (int)storageClass);
328 break;
329
330 case spv::StorageClassCrossWorkgroup:
331 UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
332 break;
333
334 case spv::StorageClassGeneric:
335 UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
336 break;
337
338 default:
339 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
340 break;
341 }
342 }
343 break;
344
345 case spv::OpConstant:
346 case spv::OpSpecConstant:
347 CreateConstant(insn).constantValue[0] = insn.word(3);
348 break;
349 case spv::OpConstantFalse:
350 case spv::OpSpecConstantFalse:
351 CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero.
352 break;
353 case spv::OpConstantTrue:
354 case spv::OpSpecConstantTrue:
355 CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set.
356 break;
357 case spv::OpConstantNull:
358 case spv::OpUndef:
359 {
360 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
361 // OpConstantNull forms a constant of arbitrary type, all zeros.
362 auto &object = CreateConstant(insn);
363 auto &objectTy = getType(object);
364 for(auto i = 0u; i < objectTy.componentCount; i++)
365 {
366 object.constantValue[i] = 0;
367 }
368 }
369 break;
370 case spv::OpConstantComposite:
371 case spv::OpSpecConstantComposite:
372 {
373 auto &object = CreateConstant(insn);
374 auto offset = 0u;
375 for(auto i = 0u; i < insn.wordCount() - 3; i++)
376 {
377 auto &constituent = getObject(insn.word(i + 3));
378 auto &constituentTy = getType(constituent);
379 for(auto j = 0u; j < constituentTy.componentCount; j++)
380 {
381 object.constantValue[offset++] = constituent.constantValue[j];
382 }
383 }
384
385 auto objectId = Object::ID(insn.word(2));
386 auto decorationsIt = decorations.find(objectId);
387 if(decorationsIt != decorations.end() &&
388 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
389 {
390 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
391 // Decorating an object with the WorkgroupSize built-in
392 // decoration will make that object contain the dimensions
393 // of a local workgroup. If an object is decorated with the
394 // WorkgroupSize decoration, this must take precedence over
395 // any execution mode set for LocalSize.
396 // The object decorated with WorkgroupSize must be declared
397 // as a three-component vector of 32-bit integers.
398 ASSERT(getType(object).componentCount == 3);
399 executionModes.WorkgroupSizeX = object.constantValue[0];
400 executionModes.WorkgroupSizeY = object.constantValue[1];
401 executionModes.WorkgroupSizeZ = object.constantValue[2];
402 executionModes.useWorkgroupSizeId = false;
403 }
404 }
405 break;
406 case spv::OpSpecConstantOp:
407 EvalSpecConstantOp(insn);
408 break;
409
410 case spv::OpCapability:
411 {
412 auto capability = static_cast<spv::Capability>(insn.word(1));
413 switch(capability)
414 {
415 case spv::CapabilityMatrix: capabilities.Matrix = true; break;
416 case spv::CapabilityShader: capabilities.Shader = true; break;
417 case spv::CapabilityStorageImageMultisample: capabilities.StorageImageMultisample = true; break;
418 case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
419 case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
420 case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break;
421 case spv::CapabilitySampleRateShading: capabilities.SampleRateShading = true; break;
422 case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
423 case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
424 case spv::CapabilityImage1D: capabilities.Image1D = true; break;
425 case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
426 case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break;
427 case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
428 case spv::CapabilityImageMSArray: capabilities.ImageMSArray = true; break;
429 case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
430 case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
431 case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
432 case spv::CapabilityDotProductInputAll: capabilities.DotProductInputAll = true; break;
433 case spv::CapabilityDotProductInput4x8Bit: capabilities.DotProductInput4x8Bit = true; break;
434 case spv::CapabilityDotProductInput4x8BitPacked: capabilities.DotProductInput4x8BitPacked = true; break;
435 case spv::CapabilityDotProduct: capabilities.DotProduct = true; break;
436 case spv::CapabilityInterpolationFunction: capabilities.InterpolationFunction = true; break;
437 case spv::CapabilityStorageImageWriteWithoutFormat: capabilities.StorageImageWriteWithoutFormat = true; break;
438 case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
439 case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
440 case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
441 case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
442 case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
443 case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
444 case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
445 case spv::CapabilityMultiView: capabilities.MultiView = true; break;
446 case spv::CapabilityDemoteToHelperInvocation: capabilities.DemoteToHelperInvocation = true; break;
447 case spv::CapabilityStencilExportEXT: capabilities.StencilExportEXT = true; break;
448 case spv::CapabilityVulkanMemoryModel: capabilities.VulkanMemoryModel = true; break;
449 case spv::CapabilityVulkanMemoryModelDeviceScope: capabilities.VulkanMemoryModelDeviceScope = true; break;
450 default:
451 UNSUPPORTED("Unsupported capability %u", insn.word(1));
452 }
453
454 // Various capabilities will be declared, but none affect our code generation at this point.
455 }
456 break;
457
458 case spv::OpMemoryModel:
459 {
460 addressingModel = static_cast<spv::AddressingModel>(insn.word(1));
461 memoryModel = static_cast<spv::MemoryModel>(insn.word(2));
462 }
463 break;
464
465 case spv::OpFunction:
466 {
467 auto functionId = Function::ID(insn.word(2));
468 ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
469 currentFunction = functionId;
470 auto &function = functions[functionId];
471 function.result = Type::ID(insn.word(1));
472 function.type = Type::ID(insn.word(4));
473 // Scan forward to find the function's label.
474 for(auto it = insn; it != end(); it++)
475 {
476 if(it.opcode() == spv::OpLabel)
477 {
478 function.entry = Block::ID(it.word(1));
479 break;
480 }
481 }
482 ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
483 }
484 break;
485
486 case spv::OpFunctionEnd:
487 currentFunction = 0;
488 break;
489
490 case spv::OpExtInstImport:
491 {
492 static constexpr std::pair<const char *, Extension::Name> extensionsByName[] = {
493 { "GLSL.std.450", Extension::GLSLstd450 },
494 { "OpenCL.DebugInfo.100", Extension::OpenCLDebugInfo100 },
495 { "NonSemantic.", Extension::NonSemanticInfo },
496 };
497 static constexpr auto extensionCount = sizeof(extensionsByName) / sizeof(extensionsByName[0]);
498
499 auto id = Extension::ID(insn.word(1));
500 auto name = insn.string(2);
501 auto ext = Extension{ Extension::Unknown };
502 for(size_t i = 0; i < extensionCount; i++)
503 {
504 if(0 == strncmp(name, extensionsByName[i].first, strlen(extensionsByName[i].first)))
505 {
506 ext = Extension{ extensionsByName[i].second };
507 break;
508 }
509 }
510 if(ext.name == Extension::Unknown)
511 {
512 UNSUPPORTED("SPIR-V Extension: %s", name);
513 break;
514 }
515 extensionsByID.emplace(id, ext);
516 extensionsImported.emplace(ext.name);
517 }
518 break;
519 case spv::OpName:
520 case spv::OpMemberName:
521 case spv::OpSource:
522 case spv::OpSourceContinued:
523 case spv::OpSourceExtension:
524 case spv::OpLine:
525 case spv::OpNoLine:
526 case spv::OpModuleProcessed:
527 // No semantic impact
528 break;
529
530 case spv::OpString:
531 strings.emplace(insn.word(1), insn.string(2));
532 break;
533
534 case spv::OpFunctionParameter:
535 // These should have all been removed by preprocessing passes. If we see them here,
536 // our assumptions are wrong and we will probably generate wrong code.
537 UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode));
538 break;
539
540 case spv::OpFunctionCall:
541 // TODO(b/141246700): Add full support for spv::OpFunctionCall
542 break;
543
544 case spv::OpFConvert:
545 UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
546 break;
547
548 case spv::OpSConvert:
549 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
550 break;
551
552 case spv::OpUConvert:
553 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
554 break;
555
556 case spv::OpLoad:
557 case spv::OpAccessChain:
558 case spv::OpInBoundsAccessChain:
559 case spv::OpSampledImage:
560 case spv::OpImage:
561 {
562 // Propagate the descriptor decorations to the result.
563 Object::ID resultId = insn.word(2);
564 Object::ID pointerId = insn.word(3);
565 const auto &d = descriptorDecorations.find(pointerId);
566
567 if(d != descriptorDecorations.end())
568 {
569 descriptorDecorations[resultId] = d->second;
570 }
571
572 DefineResult(insn);
573
574 if(opcode == spv::OpAccessChain || opcode == spv::OpInBoundsAccessChain)
575 {
576 Decorations dd{};
577 ApplyDecorationsForAccessChain(&dd, &descriptorDecorations[resultId], pointerId, Span(insn, 4, insn.wordCount() - 4));
578 // Note: offset is the one thing that does *not* propagate, as the access chain accounts for it.
579 dd.HasOffset = false;
580 decorations[resultId].Apply(dd);
581 }
582 }
583 break;
584
585 case spv::OpCompositeConstruct:
586 case spv::OpCompositeInsert:
587 case spv::OpCompositeExtract:
588 case spv::OpVectorShuffle:
589 case spv::OpVectorTimesScalar:
590 case spv::OpMatrixTimesScalar:
591 case spv::OpMatrixTimesVector:
592 case spv::OpVectorTimesMatrix:
593 case spv::OpMatrixTimesMatrix:
594 case spv::OpOuterProduct:
595 case spv::OpTranspose:
596 case spv::OpVectorExtractDynamic:
597 case spv::OpVectorInsertDynamic:
598 // Unary ops
599 case spv::OpNot:
600 case spv::OpBitFieldInsert:
601 case spv::OpBitFieldSExtract:
602 case spv::OpBitFieldUExtract:
603 case spv::OpBitReverse:
604 case spv::OpBitCount:
605 case spv::OpSNegate:
606 case spv::OpFNegate:
607 case spv::OpLogicalNot:
608 case spv::OpQuantizeToF16:
609 // Binary ops
610 case spv::OpIAdd:
611 case spv::OpISub:
612 case spv::OpIMul:
613 case spv::OpSDiv:
614 case spv::OpUDiv:
615 case spv::OpFAdd:
616 case spv::OpFSub:
617 case spv::OpFMul:
618 case spv::OpFDiv:
619 case spv::OpFMod:
620 case spv::OpFRem:
621 case spv::OpFOrdEqual:
622 case spv::OpFUnordEqual:
623 case spv::OpFOrdNotEqual:
624 case spv::OpFUnordNotEqual:
625 case spv::OpFOrdLessThan:
626 case spv::OpFUnordLessThan:
627 case spv::OpFOrdGreaterThan:
628 case spv::OpFUnordGreaterThan:
629 case spv::OpFOrdLessThanEqual:
630 case spv::OpFUnordLessThanEqual:
631 case spv::OpFOrdGreaterThanEqual:
632 case spv::OpFUnordGreaterThanEqual:
633 case spv::OpSMod:
634 case spv::OpSRem:
635 case spv::OpUMod:
636 case spv::OpIEqual:
637 case spv::OpINotEqual:
638 case spv::OpUGreaterThan:
639 case spv::OpSGreaterThan:
640 case spv::OpUGreaterThanEqual:
641 case spv::OpSGreaterThanEqual:
642 case spv::OpULessThan:
643 case spv::OpSLessThan:
644 case spv::OpULessThanEqual:
645 case spv::OpSLessThanEqual:
646 case spv::OpShiftRightLogical:
647 case spv::OpShiftRightArithmetic:
648 case spv::OpShiftLeftLogical:
649 case spv::OpBitwiseOr:
650 case spv::OpBitwiseXor:
651 case spv::OpBitwiseAnd:
652 case spv::OpLogicalOr:
653 case spv::OpLogicalAnd:
654 case spv::OpLogicalEqual:
655 case spv::OpLogicalNotEqual:
656 case spv::OpUMulExtended:
657 case spv::OpSMulExtended:
658 case spv::OpIAddCarry:
659 case spv::OpISubBorrow:
660 case spv::OpDot:
661 case spv::OpSDot:
662 case spv::OpUDot:
663 case spv::OpSUDot:
664 case spv::OpSDotAccSat:
665 case spv::OpUDotAccSat:
666 case spv::OpSUDotAccSat:
667 case spv::OpConvertFToU:
668 case spv::OpConvertFToS:
669 case spv::OpConvertSToF:
670 case spv::OpConvertUToF:
671 case spv::OpBitcast:
672 case spv::OpSelect:
673 case spv::OpIsInf:
674 case spv::OpIsNan:
675 case spv::OpAny:
676 case spv::OpAll:
677 case spv::OpDPdx:
678 case spv::OpDPdxCoarse:
679 case spv::OpDPdy:
680 case spv::OpDPdyCoarse:
681 case spv::OpFwidth:
682 case spv::OpFwidthCoarse:
683 case spv::OpDPdxFine:
684 case spv::OpDPdyFine:
685 case spv::OpFwidthFine:
686 case spv::OpAtomicLoad:
687 case spv::OpAtomicIAdd:
688 case spv::OpAtomicISub:
689 case spv::OpAtomicSMin:
690 case spv::OpAtomicSMax:
691 case spv::OpAtomicUMin:
692 case spv::OpAtomicUMax:
693 case spv::OpAtomicAnd:
694 case spv::OpAtomicOr:
695 case spv::OpAtomicXor:
696 case spv::OpAtomicIIncrement:
697 case spv::OpAtomicIDecrement:
698 case spv::OpAtomicExchange:
699 case spv::OpAtomicCompareExchange:
700 case spv::OpPhi:
701 case spv::OpImageSampleImplicitLod:
702 case spv::OpImageSampleExplicitLod:
703 case spv::OpImageSampleDrefImplicitLod:
704 case spv::OpImageSampleDrefExplicitLod:
705 case spv::OpImageSampleProjImplicitLod:
706 case spv::OpImageSampleProjExplicitLod:
707 case spv::OpImageSampleProjDrefImplicitLod:
708 case spv::OpImageSampleProjDrefExplicitLod:
709 case spv::OpImageGather:
710 case spv::OpImageDrefGather:
711 case spv::OpImageFetch:
712 case spv::OpImageQuerySizeLod:
713 case spv::OpImageQuerySize:
714 case spv::OpImageQueryLod:
715 case spv::OpImageQueryLevels:
716 case spv::OpImageQuerySamples:
717 case spv::OpImageRead:
718 case spv::OpImageTexelPointer:
719 case spv::OpGroupNonUniformElect:
720 case spv::OpGroupNonUniformAll:
721 case spv::OpGroupNonUniformAny:
722 case spv::OpGroupNonUniformAllEqual:
723 case spv::OpGroupNonUniformBroadcast:
724 case spv::OpGroupNonUniformBroadcastFirst:
725 case spv::OpGroupNonUniformBallot:
726 case spv::OpGroupNonUniformInverseBallot:
727 case spv::OpGroupNonUniformBallotBitExtract:
728 case spv::OpGroupNonUniformBallotBitCount:
729 case spv::OpGroupNonUniformBallotFindLSB:
730 case spv::OpGroupNonUniformBallotFindMSB:
731 case spv::OpGroupNonUniformShuffle:
732 case spv::OpGroupNonUniformShuffleXor:
733 case spv::OpGroupNonUniformShuffleUp:
734 case spv::OpGroupNonUniformShuffleDown:
735 case spv::OpGroupNonUniformIAdd:
736 case spv::OpGroupNonUniformFAdd:
737 case spv::OpGroupNonUniformIMul:
738 case spv::OpGroupNonUniformFMul:
739 case spv::OpGroupNonUniformSMin:
740 case spv::OpGroupNonUniformUMin:
741 case spv::OpGroupNonUniformFMin:
742 case spv::OpGroupNonUniformSMax:
743 case spv::OpGroupNonUniformUMax:
744 case spv::OpGroupNonUniformFMax:
745 case spv::OpGroupNonUniformBitwiseAnd:
746 case spv::OpGroupNonUniformBitwiseOr:
747 case spv::OpGroupNonUniformBitwiseXor:
748 case spv::OpGroupNonUniformLogicalAnd:
749 case spv::OpGroupNonUniformLogicalOr:
750 case spv::OpGroupNonUniformLogicalXor:
751 case spv::OpCopyObject:
752 case spv::OpCopyLogical:
753 case spv::OpArrayLength:
754 case spv::OpIsHelperInvocationEXT:
755 // Instructions that yield an intermediate value or divergent pointer
756 DefineResult(insn);
757 break;
758
759 case spv::OpExtInst:
760 switch(getExtension(insn.word(3)).name)
761 {
762 case Extension::GLSLstd450:
763 DefineResult(insn);
764 break;
765 case Extension::OpenCLDebugInfo100:
766 DefineOpenCLDebugInfo100(insn);
767 break;
768 case Extension::NonSemanticInfo:
769 // An extended set name which is prefixed with "NonSemantic." is
770 // guaranteed to contain only non-semantic instructions and all
771 // OpExtInst instructions referencing this set can be ignored.
772 break;
773 default:
774 UNREACHABLE("Unexpected Extension name %d", int(getExtension(insn.word(3)).name));
775 break;
776 }
777 break;
778
779 case spv::OpStore:
780 case spv::OpAtomicStore:
781 case spv::OpImageWrite:
782 case spv::OpCopyMemory:
783 case spv::OpMemoryBarrier:
784 // Don't need to do anything during analysis pass
785 break;
786
787 case spv::OpControlBarrier:
788 analysis.ContainsControlBarriers = true;
789 break;
790
791 case spv::OpExtension:
792 {
793 const char *ext = insn.string(1);
794 // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
795 // extension per Appendix A, `Vulkan Environment for SPIR-V`.
796 if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
797 if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
798 if(!strcmp(ext, "SPV_KHR_16bit_storage")) break;
799 if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
800 if(!strcmp(ext, "SPV_KHR_device_group")) break;
801 if(!strcmp(ext, "SPV_KHR_multiview")) break;
802 if(!strcmp(ext, "SPV_EXT_demote_to_helper_invocation")) break;
803 if(!strcmp(ext, "SPV_KHR_terminate_invocation")) break;
804 if(!strcmp(ext, "SPV_EXT_shader_stencil_export")) break;
805 if(!strcmp(ext, "SPV_KHR_float_controls")) break;
806 if(!strcmp(ext, "SPV_KHR_integer_dot_product")) break;
807 if(!strcmp(ext, "SPV_KHR_non_semantic_info")) break;
808 if(!strcmp(ext, "SPV_KHR_vulkan_memory_model")) break;
809 if(!strcmp(ext, "SPV_GOOGLE_decorate_string")) break;
810 if(!strcmp(ext, "SPV_GOOGLE_hlsl_functionality1")) break;
811 if(!strcmp(ext, "SPV_GOOGLE_user_type")) break;
812 UNSUPPORTED("SPIR-V Extension: %s", ext);
813 }
814 break;
815
816 default:
817 UNSUPPORTED("%s", OpcodeName(opcode));
818 }
819 }
820
821 ASSERT_MSG(entryPoint != 0, "Entry point '%s' not found", entryPointName);
822 for(auto &it : functions)
823 {
824 it.second.AssignBlockFields();
825 }
826
827 #ifdef SPIRV_SHADER_CFG_GRAPHVIZ_DOT_FILEPATH
828 {
829 char path[1024];
830 snprintf(path, sizeof(path), SPIRV_SHADER_CFG_GRAPHVIZ_DOT_FILEPATH, codeSerialID);
831 WriteCFGGraphVizDotFile(path);
832 }
833 #endif
834
835 dbgCreateFile();
836 }
837
~SpirvShader()838 SpirvShader::~SpirvShader()
839 {
840 dbgTerm();
841 }
842
DeclareType(InsnIterator insn)843 void SpirvShader::DeclareType(InsnIterator insn)
844 {
845 Type::ID resultId = insn.word(1);
846
847 auto &type = types[resultId];
848 type.definition = insn;
849 type.componentCount = ComputeTypeSize(insn);
850
851 // A structure is a builtin block if it has a builtin
852 // member. All members of such a structure are builtins.
853 switch(insn.opcode())
854 {
855 case spv::OpTypeStruct:
856 {
857 auto d = memberDecorations.find(resultId);
858 if(d != memberDecorations.end())
859 {
860 for(auto &m : d->second)
861 {
862 if(m.HasBuiltIn)
863 {
864 type.isBuiltInBlock = true;
865 break;
866 }
867 }
868 }
869 }
870 break;
871 case spv::OpTypePointer:
872 {
873 Type::ID elementTypeId = insn.word(3);
874 type.element = elementTypeId;
875 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
876 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
877 }
878 break;
879 case spv::OpTypeVector:
880 case spv::OpTypeMatrix:
881 case spv::OpTypeArray:
882 case spv::OpTypeRuntimeArray:
883 {
884 Type::ID elementTypeId = insn.word(2);
885 type.element = elementTypeId;
886 }
887 break;
888 default:
889 break;
890 }
891 }
892
CreateConstant(InsnIterator insn)893 SpirvShader::Object &SpirvShader::CreateConstant(InsnIterator insn)
894 {
895 Type::ID typeId = insn.word(1);
896 Object::ID resultId = insn.word(2);
897 auto &object = defs[resultId];
898 auto &objectTy = getType(typeId);
899 object.kind = Object::Kind::Constant;
900 object.definition = insn;
901 object.constantValue.resize(objectTy.componentCount);
902
903 return object;
904 }
905
ProcessInterfaceVariable(Object & object)906 void SpirvShader::ProcessInterfaceVariable(Object &object)
907 {
908 auto &objectTy = getType(object);
909 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
910
911 ASSERT(objectTy.opcode() == spv::OpTypePointer);
912 auto pointeeTy = getType(objectTy.element);
913
914 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
915 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
916
917 ASSERT(object.opcode() == spv::OpVariable);
918 Object::ID resultId = object.definition.word(2);
919
920 if(objectTy.isBuiltInBlock)
921 {
922 // Walk the builtin block, registering each of its members separately.
923 auto m = memberDecorations.find(objectTy.element);
924 ASSERT(m != memberDecorations.end()); // Otherwise we wouldn't have marked the type chain
925 auto &structType = pointeeTy.definition;
926 auto memberIndex = 0u;
927 auto offset = 0u;
928
929 for(auto &member : m->second)
930 {
931 auto &memberType = getType(structType.word(2 + memberIndex));
932
933 if(member.HasBuiltIn)
934 {
935 builtinInterface[member.BuiltIn] = { resultId, offset, memberType.componentCount };
936 }
937
938 offset += memberType.componentCount;
939 ++memberIndex;
940 }
941
942 return;
943 }
944
945 auto d = decorations.find(resultId);
946 if(d != decorations.end() && d->second.HasBuiltIn)
947 {
948 builtinInterface[d->second.BuiltIn] = { resultId, 0, pointeeTy.componentCount };
949 }
950 else
951 {
952 object.kind = Object::Kind::InterfaceVariable;
953 VisitInterface(resultId,
954 [&userDefinedInterface](Decorations const &d, AttribType type) {
955 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
956 int32_t scalarSlot = (d.Location << 2) | d.Component;
957 ASSERT(scalarSlot >= 0 &&
958 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
959
960 auto &slot = userDefinedInterface[scalarSlot];
961 slot.Type = type;
962 slot.Flat = d.Flat;
963 slot.NoPerspective = d.NoPerspective;
964 slot.Centroid = d.Centroid;
965 });
966 }
967 }
968
GetNumInputComponents(int32_t location) const969 uint32_t SpirvShader::GetNumInputComponents(int32_t location) const
970 {
971 ASSERT(location >= 0);
972
973 // Verify how many component(s) per input
974 // 1 to 4, for float, vec2, vec3, vec4.
975 // Note that matrices are divided over multiple inputs
976 uint32_t num_components_per_input = 0;
977 for(; num_components_per_input < 4; ++num_components_per_input)
978 {
979 if(inputs[(location << 2) | num_components_per_input].Type == ATTRIBTYPE_UNUSED)
980 {
981 break;
982 }
983 }
984
985 return num_components_per_input;
986 }
987
GetPackedInterpolant(int32_t location) const988 uint32_t SpirvShader::GetPackedInterpolant(int32_t location) const
989 {
990 ASSERT(location >= 0);
991 const uint32_t maxInterpolant = (location << 2);
992
993 // Return the number of used components only at location
994 uint32_t packedInterpolant = 0;
995 for(uint32_t i = 0; i < maxInterpolant; ++i)
996 {
997 if(inputs[i].Type != ATTRIBTYPE_UNUSED)
998 {
999 ++packedInterpolant;
1000 }
1001 }
1002
1003 return packedInterpolant;
1004 }
1005
ProcessExecutionMode(InsnIterator insn)1006 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
1007 {
1008 Function::ID function = insn.word(1);
1009 if(function != entryPoint)
1010 {
1011 return;
1012 }
1013
1014 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
1015 switch(mode)
1016 {
1017 case spv::ExecutionModeEarlyFragmentTests:
1018 executionModes.EarlyFragmentTests = true;
1019 break;
1020 case spv::ExecutionModeDepthReplacing:
1021 executionModes.DepthReplacing = true;
1022 break;
1023 case spv::ExecutionModeDepthGreater:
1024 // TODO(b/177915067): Can be used to optimize depth test, currently unused.
1025 executionModes.DepthGreater = true;
1026 break;
1027 case spv::ExecutionModeDepthLess:
1028 // TODO(b/177915067): Can be used to optimize depth test, currently unused.
1029 executionModes.DepthLess = true;
1030 break;
1031 case spv::ExecutionModeDepthUnchanged:
1032 // TODO(b/177915067): Can be used to optimize depth test, currently unused.
1033 executionModes.DepthUnchanged = true;
1034 break;
1035 case spv::ExecutionModeLocalSize:
1036 case spv::ExecutionModeLocalSizeId:
1037 executionModes.WorkgroupSizeX = insn.word(3);
1038 executionModes.WorkgroupSizeY = insn.word(4);
1039 executionModes.WorkgroupSizeZ = insn.word(5);
1040 executionModes.useWorkgroupSizeId = (mode == spv::ExecutionModeLocalSizeId);
1041 break;
1042 case spv::ExecutionModeOriginUpperLeft:
1043 // This is always the case for a Vulkan shader. Do nothing.
1044 break;
1045 default:
1046 UNREACHABLE("Execution mode: %d", int(mode));
1047 }
1048 }
1049
getWorkgroupSizeX() const1050 uint32_t SpirvShader::getWorkgroupSizeX() const
1051 {
1052 return executionModes.useWorkgroupSizeId ? getObject(executionModes.WorkgroupSizeX).constantValue[0] : executionModes.WorkgroupSizeX.value();
1053 }
1054
getWorkgroupSizeY() const1055 uint32_t SpirvShader::getWorkgroupSizeY() const
1056 {
1057 return executionModes.useWorkgroupSizeId ? getObject(executionModes.WorkgroupSizeY).constantValue[0] : executionModes.WorkgroupSizeY.value();
1058 }
1059
getWorkgroupSizeZ() const1060 uint32_t SpirvShader::getWorkgroupSizeZ() const
1061 {
1062 return executionModes.useWorkgroupSizeId ? getObject(executionModes.WorkgroupSizeZ).constantValue[0] : executionModes.WorkgroupSizeZ.value();
1063 }
1064
ComputeTypeSize(InsnIterator insn)1065 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
1066 {
1067 // Types are always built from the bottom up (with the exception of forward ptrs, which
1068 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
1069 // already been described (and so their sizes determined)
1070 switch(insn.opcode())
1071 {
1072 case spv::OpTypeVoid:
1073 case spv::OpTypeSampler:
1074 case spv::OpTypeImage:
1075 case spv::OpTypeSampledImage:
1076 case spv::OpTypeFunction:
1077 case spv::OpTypeRuntimeArray:
1078 // Objects that don't consume any space.
1079 // Descriptor-backed objects currently only need exist at compile-time.
1080 // Runtime arrays don't appear in places where their size would be interesting
1081 return 0;
1082
1083 case spv::OpTypeBool:
1084 case spv::OpTypeFloat:
1085 case spv::OpTypeInt:
1086 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
1087 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
1088 return 1;
1089
1090 case spv::OpTypeVector:
1091 case spv::OpTypeMatrix:
1092 // Vectors and matrices both consume element count * element size.
1093 return getType(insn.word(2)).componentCount * insn.word(3);
1094
1095 case spv::OpTypeArray:
1096 {
1097 // Element count * element size. Array sizes come from constant ids.
1098 auto arraySize = GetConstScalarInt(insn.word(3));
1099 return getType(insn.word(2)).componentCount * arraySize;
1100 }
1101
1102 case spv::OpTypeStruct:
1103 {
1104 uint32_t size = 0;
1105 for(uint32_t i = 2u; i < insn.wordCount(); i++)
1106 {
1107 size += getType(insn.word(i)).componentCount;
1108 }
1109 return size;
1110 }
1111
1112 case spv::OpTypePointer:
1113 // Runtime representation of a pointer is a per-lane index.
1114 // Note: clients are expected to look through the pointer if they want the pointee size instead.
1115 return 1;
1116
1117 default:
1118 UNREACHABLE("%s", OpcodeName(insn.opcode()));
1119 return 0;
1120 }
1121 }
1122
VisitInterfaceInner(Type::ID id,Decorations d,const InterfaceVisitor & f) const1123 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &f) const
1124 {
1125 // Recursively walks variable definition and its type tree, taking into account
1126 // any explicit Location or Component decorations encountered; where explicit
1127 // Locations or Components are not specified, assigns them sequentially.
1128 // Collected decorations are carried down toward the leaves and across
1129 // siblings; Effect of decorations intentionally does not flow back up the tree.
1130 //
1131 // F is a functor to be called with the effective decoration set for every component.
1132 //
1133 // Returns the next available location, and calls f().
1134
1135 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
1136
1137 ApplyDecorationsForId(&d, id);
1138
1139 auto const &obj = getType(id);
1140 switch(obj.opcode())
1141 {
1142 case spv::OpTypePointer:
1143 return VisitInterfaceInner(obj.definition.word(3), d, f);
1144 case spv::OpTypeMatrix:
1145 for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
1146 {
1147 // consumes same components of N consecutive locations
1148 VisitInterfaceInner(obj.definition.word(2), d, f);
1149 }
1150 return d.Location;
1151 case spv::OpTypeVector:
1152 for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
1153 {
1154 // consumes N consecutive components in the same location
1155 VisitInterfaceInner(obj.definition.word(2), d, f);
1156 }
1157 return d.Location + 1;
1158 case spv::OpTypeFloat:
1159 f(d, ATTRIBTYPE_FLOAT);
1160 return d.Location + 1;
1161 case spv::OpTypeInt:
1162 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
1163 return d.Location + 1;
1164 case spv::OpTypeBool:
1165 f(d, ATTRIBTYPE_UINT);
1166 return d.Location + 1;
1167 case spv::OpTypeStruct:
1168 {
1169 // iterate over members, which may themselves have Location/Component decorations
1170 for(auto i = 0u; i < obj.definition.wordCount() - 2; i++)
1171 {
1172 Decorations dMember = d;
1173 ApplyDecorationsForIdMember(&dMember, id, i);
1174 d.Location = VisitInterfaceInner(obj.definition.word(i + 2), dMember, f);
1175 d.Component = 0; // Implicit locations always have component=0
1176 }
1177 return d.Location;
1178 }
1179 case spv::OpTypeArray:
1180 {
1181 auto arraySize = GetConstScalarInt(obj.definition.word(3));
1182 for(auto i = 0u; i < arraySize; i++)
1183 {
1184 d.Location = VisitInterfaceInner(obj.definition.word(2), d, f);
1185 }
1186 return d.Location;
1187 }
1188 default:
1189 // Intentionally partial; most opcodes do not participate in type hierarchies
1190 return 0;
1191 }
1192 }
1193
VisitInterface(Object::ID id,const InterfaceVisitor & f) const1194 void SpirvShader::VisitInterface(Object::ID id, const InterfaceVisitor &f) const
1195 {
1196 // Walk a variable definition and call f for each component in it.
1197 Decorations d = GetDecorationsForId(id);
1198
1199 auto def = getObject(id).definition;
1200 ASSERT(def.opcode() == spv::OpVariable);
1201 VisitInterfaceInner(def.word(1), d, f);
1202 }
1203
ApplyDecorationsForAccessChain(Decorations * d,DescriptorDecorations * dd,Object::ID baseId,const Span & indexIds) const1204 void SpirvShader::ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, const Span &indexIds) const
1205 {
1206 ApplyDecorationsForId(d, baseId);
1207 auto &baseObject = getObject(baseId);
1208 ApplyDecorationsForId(d, baseObject.typeId());
1209 auto typeId = getType(baseObject).element;
1210
1211 for(uint32_t i = 0; i < indexIds.size(); i++)
1212 {
1213 ApplyDecorationsForId(d, typeId);
1214 auto &type = getType(typeId);
1215 switch(type.opcode())
1216 {
1217 case spv::OpTypeStruct:
1218 {
1219 int memberIndex = GetConstScalarInt(indexIds[i]);
1220 ApplyDecorationsForIdMember(d, typeId, memberIndex);
1221 typeId = type.definition.word(2u + memberIndex);
1222 }
1223 break;
1224 case spv::OpTypeArray:
1225 case spv::OpTypeRuntimeArray:
1226 if(dd->InputAttachmentIndex >= 0)
1227 {
1228 dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
1229 }
1230 typeId = type.element;
1231 break;
1232 case spv::OpTypeVector:
1233 typeId = type.element;
1234 break;
1235 case spv::OpTypeMatrix:
1236 typeId = type.element;
1237 d->InsideMatrix = true;
1238 break;
1239 default:
1240 UNREACHABLE("%s", OpcodeName(type.definition.opcode()));
1241 }
1242 }
1243 }
1244
WalkExplicitLayoutAccessChain(Object::ID baseId,const Span & indexIds,const EmitState * state) const1245 SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, const Span &indexIds, const EmitState *state) const
1246 {
1247 // Produce a offset into external memory in sizeof(float) units
1248
1249 auto &baseObject = getObject(baseId);
1250 Type::ID typeId = getType(baseObject).element;
1251 Decorations d = GetDecorationsForId(baseObject.typeId());
1252
1253 Int arrayIndex = 0;
1254 uint32_t start = 0;
1255 if(baseObject.kind == Object::Kind::DescriptorSet)
1256 {
1257 auto type = getType(typeId).definition.opcode();
1258 if(type == spv::OpTypeArray || type == spv::OpTypeRuntimeArray)
1259 {
1260 auto &obj = getObject(indexIds[0]);
1261 ASSERT(obj.kind == Object::Kind::Constant || obj.kind == Object::Kind::Intermediate);
1262 if(obj.kind == Object::Kind::Constant)
1263 {
1264 arrayIndex = GetConstScalarInt(indexIds[0]);
1265 }
1266 else
1267 {
1268 // Note: the value of indexIds[0] must be dynamically uniform.
1269 arrayIndex = Extract(state->getIntermediate(indexIds[0]).Int(0), 0);
1270 }
1271
1272 start = 1;
1273 typeId = getType(typeId).element;
1274 }
1275 }
1276
1277 auto ptr = GetPointerToData(baseId, arrayIndex, state);
1278
1279 int constantOffset = 0;
1280
1281 for(uint32_t i = start; i < indexIds.size(); i++)
1282 {
1283 auto &type = getType(typeId);
1284 ApplyDecorationsForId(&d, typeId);
1285
1286 switch(type.definition.opcode())
1287 {
1288 case spv::OpTypeStruct:
1289 {
1290 int memberIndex = GetConstScalarInt(indexIds[i]);
1291 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
1292 ASSERT(d.HasOffset);
1293 constantOffset += d.Offset;
1294 typeId = type.definition.word(2u + memberIndex);
1295 }
1296 break;
1297 case spv::OpTypeArray:
1298 case spv::OpTypeRuntimeArray:
1299 {
1300 // TODO: b/127950082: Check bounds.
1301 ASSERT(d.HasArrayStride);
1302 auto &obj = getObject(indexIds[i]);
1303 if(obj.kind == Object::Kind::Constant)
1304 {
1305 constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
1306 }
1307 else
1308 {
1309 ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
1310 }
1311 typeId = type.element;
1312 }
1313 break;
1314 case spv::OpTypeMatrix:
1315 {
1316 // TODO: b/127950082: Check bounds.
1317 ASSERT(d.HasMatrixStride);
1318 d.InsideMatrix = true;
1319 auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
1320 auto &obj = getObject(indexIds[i]);
1321 if(obj.kind == Object::Kind::Constant)
1322 {
1323 constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
1324 }
1325 else
1326 {
1327 ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
1328 }
1329 typeId = type.element;
1330 }
1331 break;
1332 case spv::OpTypeVector:
1333 {
1334 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
1335 auto &obj = getObject(indexIds[i]);
1336 if(obj.kind == Object::Kind::Constant)
1337 {
1338 constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
1339 }
1340 else
1341 {
1342 ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
1343 }
1344 typeId = type.element;
1345 }
1346 break;
1347 default:
1348 UNREACHABLE("%s", OpcodeName(type.definition.opcode()));
1349 }
1350 }
1351
1352 ptr += constantOffset;
1353 return ptr;
1354 }
1355
WalkAccessChain(Object::ID baseId,const Span & indexIds,EmitState const * state) const1356 SIMD::Pointer SpirvShader::WalkAccessChain(Object::ID baseId, const Span &indexIds, EmitState const *state) const
1357 {
1358 // TODO: avoid doing per-lane work in some cases if we can?
1359 auto routine = state->routine;
1360 auto &baseObject = getObject(baseId);
1361 Type::ID typeId = getType(baseObject).element;
1362
1363 auto ptr = state->getPointer(baseId);
1364
1365 int constantOffset = 0;
1366
1367 for(uint32_t i = 0; i < indexIds.size(); i++)
1368 {
1369 auto &type = getType(typeId);
1370 switch(type.opcode())
1371 {
1372 case spv::OpTypeStruct:
1373 {
1374 int memberIndex = GetConstScalarInt(indexIds[i]);
1375 int offsetIntoStruct = 0;
1376 for(auto j = 0; j < memberIndex; j++)
1377 {
1378 auto memberType = type.definition.word(2u + j);
1379 offsetIntoStruct += getType(memberType).componentCount * sizeof(float);
1380 }
1381 constantOffset += offsetIntoStruct;
1382 typeId = type.definition.word(2u + memberIndex);
1383 }
1384 break;
1385
1386 case spv::OpTypeVector:
1387 case spv::OpTypeMatrix:
1388 case spv::OpTypeArray:
1389 case spv::OpTypeRuntimeArray:
1390 {
1391 // TODO(b/127950082): Check bounds.
1392 if(getType(baseObject).storageClass == spv::StorageClassUniformConstant)
1393 {
1394 // indexing into an array of descriptors.
1395 auto d = descriptorDecorations.at(baseId);
1396 ASSERT(d.DescriptorSet >= 0);
1397 ASSERT(d.Binding >= 0);
1398 uint32_t descriptorSize = routine->pipelineLayout->getDescriptorSize(d.DescriptorSet, d.Binding);
1399
1400 auto &obj = getObject(indexIds[i]);
1401 if(obj.kind == Object::Kind::Constant)
1402 {
1403 ptr.base += descriptorSize * GetConstScalarInt(indexIds[i]);
1404 }
1405 else
1406 {
1407 // Note: the value of indexIds[i] must be dynamically uniform.
1408 ptr.base += descriptorSize * Extract(state->getIntermediate(indexIds[i]).Int(0), 0);
1409 }
1410 }
1411 else
1412 {
1413 auto stride = getType(type.element).componentCount * static_cast<uint32_t>(sizeof(float));
1414 auto &obj = getObject(indexIds[i]);
1415 if(obj.kind == Object::Kind::Constant)
1416 {
1417 ptr += stride * GetConstScalarInt(indexIds[i]);
1418 }
1419 else
1420 {
1421 ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
1422 }
1423 }
1424 typeId = type.element;
1425 }
1426 break;
1427
1428 default:
1429 UNREACHABLE("%s", OpcodeName(type.opcode()));
1430 }
1431 }
1432
1433 if(constantOffset != 0)
1434 {
1435 ptr += constantOffset;
1436 }
1437 return ptr;
1438 }
1439
WalkLiteralAccessChain(Type::ID typeId,const Span & indexes) const1440 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, const Span &indexes) const
1441 {
1442 uint32_t componentOffset = 0;
1443
1444 for(uint32_t i = 0; i < indexes.size(); i++)
1445 {
1446 auto &type = getType(typeId);
1447 switch(type.opcode())
1448 {
1449 case spv::OpTypeStruct:
1450 {
1451 int memberIndex = indexes[i];
1452 int offsetIntoStruct = 0;
1453 for(auto j = 0; j < memberIndex; j++)
1454 {
1455 auto memberType = type.definition.word(2u + j);
1456 offsetIntoStruct += getType(memberType).componentCount;
1457 }
1458 componentOffset += offsetIntoStruct;
1459 typeId = type.definition.word(2u + memberIndex);
1460 }
1461 break;
1462
1463 case spv::OpTypeVector:
1464 case spv::OpTypeMatrix:
1465 case spv::OpTypeArray:
1466 {
1467 auto elementType = type.definition.word(2);
1468 auto stride = getType(elementType).componentCount;
1469 componentOffset += stride * indexes[i];
1470 typeId = elementType;
1471 }
1472 break;
1473
1474 default:
1475 UNREACHABLE("%s", OpcodeName(type.opcode()));
1476 }
1477 }
1478
1479 return componentOffset;
1480 }
1481
Apply(spv::Decoration decoration,uint32_t arg)1482 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
1483 {
1484 switch(decoration)
1485 {
1486 case spv::DecorationLocation:
1487 HasLocation = true;
1488 Location = static_cast<int32_t>(arg);
1489 break;
1490 case spv::DecorationComponent:
1491 HasComponent = true;
1492 Component = arg;
1493 break;
1494 case spv::DecorationBuiltIn:
1495 HasBuiltIn = true;
1496 BuiltIn = static_cast<spv::BuiltIn>(arg);
1497 break;
1498 case spv::DecorationFlat:
1499 Flat = true;
1500 break;
1501 case spv::DecorationNoPerspective:
1502 NoPerspective = true;
1503 break;
1504 case spv::DecorationCentroid:
1505 Centroid = true;
1506 break;
1507 case spv::DecorationBlock:
1508 Block = true;
1509 break;
1510 case spv::DecorationBufferBlock:
1511 BufferBlock = true;
1512 break;
1513 case spv::DecorationOffset:
1514 HasOffset = true;
1515 Offset = static_cast<int32_t>(arg);
1516 break;
1517 case spv::DecorationArrayStride:
1518 HasArrayStride = true;
1519 ArrayStride = static_cast<int32_t>(arg);
1520 break;
1521 case spv::DecorationMatrixStride:
1522 HasMatrixStride = true;
1523 MatrixStride = static_cast<int32_t>(arg);
1524 break;
1525 case spv::DecorationRelaxedPrecision:
1526 RelaxedPrecision = true;
1527 break;
1528 case spv::DecorationRowMajor:
1529 HasRowMajor = true;
1530 RowMajor = true;
1531 break;
1532 case spv::DecorationColMajor:
1533 HasRowMajor = true;
1534 RowMajor = false;
1535 default:
1536 // Intentionally partial, there are many decorations we just don't care about.
1537 break;
1538 }
1539 }
1540
Apply(const sw::SpirvShader::Decorations & src)1541 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1542 {
1543 // Apply a decoration group to this set of decorations
1544 if(src.HasBuiltIn)
1545 {
1546 HasBuiltIn = true;
1547 BuiltIn = src.BuiltIn;
1548 }
1549
1550 if(src.HasLocation)
1551 {
1552 HasLocation = true;
1553 Location = src.Location;
1554 }
1555
1556 if(src.HasComponent)
1557 {
1558 HasComponent = true;
1559 Component = src.Component;
1560 }
1561
1562 if(src.HasOffset)
1563 {
1564 HasOffset = true;
1565 Offset = src.Offset;
1566 }
1567
1568 if(src.HasArrayStride)
1569 {
1570 HasArrayStride = true;
1571 ArrayStride = src.ArrayStride;
1572 }
1573
1574 if(src.HasMatrixStride)
1575 {
1576 HasMatrixStride = true;
1577 MatrixStride = src.MatrixStride;
1578 }
1579
1580 if(src.HasRowMajor)
1581 {
1582 HasRowMajor = true;
1583 RowMajor = src.RowMajor;
1584 }
1585
1586 Flat |= src.Flat;
1587 NoPerspective |= src.NoPerspective;
1588 Centroid |= src.Centroid;
1589 Block |= src.Block;
1590 BufferBlock |= src.BufferBlock;
1591 RelaxedPrecision |= src.RelaxedPrecision;
1592 InsideMatrix |= src.InsideMatrix;
1593 }
1594
Apply(const sw::SpirvShader::DescriptorDecorations & src)1595 void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src)
1596 {
1597 if(src.DescriptorSet >= 0)
1598 {
1599 DescriptorSet = src.DescriptorSet;
1600 }
1601
1602 if(src.Binding >= 0)
1603 {
1604 Binding = src.Binding;
1605 }
1606
1607 if(src.InputAttachmentIndex >= 0)
1608 {
1609 InputAttachmentIndex = src.InputAttachmentIndex;
1610 }
1611 }
1612
GetDecorationsForId(TypeOrObjectID id) const1613 SpirvShader::Decorations SpirvShader::GetDecorationsForId(TypeOrObjectID id) const
1614 {
1615 Decorations d;
1616 ApplyDecorationsForId(&d, id);
1617
1618 return d;
1619 }
1620
ApplyDecorationsForId(Decorations * d,TypeOrObjectID id) const1621 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1622 {
1623 auto it = decorations.find(id);
1624 if(it != decorations.end())
1625 {
1626 d->Apply(it->second);
1627 }
1628 }
1629
ApplyDecorationsForIdMember(Decorations * d,Type::ID id,uint32_t member) const1630 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1631 {
1632 auto it = memberDecorations.find(id);
1633 if(it != memberDecorations.end() && member < it->second.size())
1634 {
1635 d->Apply(it->second[member]);
1636 }
1637 }
1638
DefineResult(const InsnIterator & insn)1639 void SpirvShader::DefineResult(const InsnIterator &insn)
1640 {
1641 Type::ID typeId = insn.word(1);
1642 Object::ID resultId = insn.word(2);
1643 auto &object = defs[resultId];
1644
1645 switch(getType(typeId).opcode())
1646 {
1647 case spv::OpTypePointer:
1648 case spv::OpTypeImage:
1649 case spv::OpTypeSampledImage:
1650 case spv::OpTypeSampler:
1651 object.kind = Object::Kind::Pointer;
1652 break;
1653
1654 default:
1655 object.kind = Object::Kind::Intermediate;
1656 }
1657
1658 object.definition = insn;
1659 dbgDeclareResult(insn, resultId);
1660 }
1661
getOutOfBoundsBehavior(Object::ID pointerId,EmitState const * state) const1662 OutOfBoundsBehavior SpirvShader::getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const
1663 {
1664 auto it = descriptorDecorations.find(pointerId);
1665 if(it != descriptorDecorations.end())
1666 {
1667 const auto &d = it->second;
1668 if((d.DescriptorSet >= 0) && (d.Binding >= 0))
1669 {
1670 auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
1671 if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
1672 {
1673 return OutOfBoundsBehavior::UndefinedBehavior;
1674 }
1675 }
1676 }
1677
1678 auto &pointer = getObject(pointerId);
1679 auto &pointerTy = getType(pointer);
1680 switch(pointerTy.storageClass)
1681 {
1682 case spv::StorageClassUniform:
1683 case spv::StorageClassStorageBuffer:
1684 // Buffer resource access. robustBufferAccess feature applies.
1685 return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
1686 : OutOfBoundsBehavior::UndefinedBehavior;
1687
1688 case spv::StorageClassImage:
1689 // VK_EXT_image_robustness requires nullifying out-of-bounds accesses.
1690 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1691 return OutOfBoundsBehavior::Nullify;
1692
1693 case spv::StorageClassInput:
1694 if(executionModel == spv::ExecutionModelVertex)
1695 {
1696 // Vertex attributes follow robustBufferAccess rules.
1697 return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
1698 : OutOfBoundsBehavior::UndefinedBehavior;
1699 }
1700 // Fall through to default case.
1701 default:
1702 // TODO(b/192310780): StorageClassFunction out-of-bounds accesses are undefined behavior.
1703 // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
1704 // TODO(b/131224163): Optimize cases statically known to be within bounds.
1705 return OutOfBoundsBehavior::UndefinedValue;
1706 }
1707
1708 return OutOfBoundsBehavior::Nullify;
1709 }
1710
1711 // emit-time
1712
emitProlog(SpirvRoutine * routine) const1713 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1714 {
1715 if(IsProfilingEnabled())
1716 {
1717 routine->profData = std::make_unique<SpirvProfileData>();
1718 }
1719
1720 for(auto insn : *this)
1721 {
1722 switch(insn.opcode())
1723 {
1724 case spv::OpVariable:
1725 {
1726 auto resultPointerType = getType(insn.resultTypeId());
1727 auto pointeeType = getType(resultPointerType.element);
1728
1729 if(pointeeType.componentCount > 0) // TODO: what to do about zero-slot objects?
1730 {
1731 routine->createVariable(insn.resultId(), pointeeType.componentCount);
1732 }
1733 }
1734 break;
1735
1736 case spv::OpPhi:
1737 {
1738 auto type = getType(insn.resultTypeId());
1739 routine->phis.emplace(insn.resultId(), SpirvRoutine::Variable(type.componentCount));
1740 }
1741 break;
1742
1743 case spv::OpImageSampleImplicitLod:
1744 case spv::OpImageSampleExplicitLod:
1745 case spv::OpImageSampleDrefImplicitLod:
1746 case spv::OpImageSampleDrefExplicitLod:
1747 case spv::OpImageSampleProjImplicitLod:
1748 case spv::OpImageSampleProjExplicitLod:
1749 case spv::OpImageSampleProjDrefImplicitLod:
1750 case spv::OpImageSampleProjDrefExplicitLod:
1751 case spv::OpImageFetch:
1752 case spv::OpImageGather:
1753 case spv::OpImageDrefGather:
1754 case spv::OpImageWrite:
1755 case spv::OpImageQueryLod:
1756 {
1757 // The 'inline' sampler caches must be created in the prolog to initialize the tags.
1758 uint32_t instructionPosition = insn.distanceFrom(this->begin());
1759 routine->samplerCache.emplace(instructionPosition, SpirvRoutine::SamplerCache{});
1760 }
1761 break;
1762
1763 default:
1764 // Nothing else produces interface variables, so can all be safely ignored.
1765 break;
1766 }
1767 }
1768 }
1769
emit(SpirvRoutine * routine,RValue<SIMD::Int> const & activeLaneMask,RValue<SIMD::Int> const & storesAndAtomicsMask,const vk::DescriptorSet::Bindings & descriptorSets,unsigned int multiSampleCount) const1770 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount) const
1771 {
1772 EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, multiSampleCount);
1773
1774 dbgBeginEmit(&state);
1775 defer(dbgEndEmit(&state));
1776
1777 // Emit everything up to the first label
1778 // TODO: Separate out dispatch of block from non-block instructions?
1779 for(auto insn : *this)
1780 {
1781 if(insn.opcode() == spv::OpLabel)
1782 {
1783 break;
1784 }
1785 EmitInstruction(insn, &state);
1786 }
1787
1788 // Emit all the blocks starting from entryPoint.
1789 EmitBlocks(getFunction(entryPoint).entry, &state);
1790 }
1791
EmitInstructions(InsnIterator begin,InsnIterator end,EmitState * state) const1792 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1793 {
1794 for(auto insn = begin; insn != end; insn++)
1795 {
1796 auto res = EmitInstruction(insn, state);
1797 switch(res)
1798 {
1799 case EmitResult::Continue:
1800 continue;
1801 case EmitResult::Terminator:
1802 break;
1803 default:
1804 UNREACHABLE("Unexpected EmitResult %d", int(res));
1805 break;
1806 }
1807 }
1808 }
1809
EmitInstruction(InsnIterator insn,EmitState * state) const1810 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1811 {
1812 dbgBeginEmitInstruction(insn, state);
1813 defer(dbgEndEmitInstruction(insn, state));
1814
1815 auto opcode = insn.opcode();
1816
1817 if(IsProfilingEnabled() && IsStatement(opcode))
1818 {
1819 int64_t *counter = &state->routine->profData->spvOpExecutionCount[opcode];
1820 AddAtomic(Pointer<Long>(ConstantPointer(counter)), 1);
1821 }
1822
1823 #if SPIRV_SHADER_ENABLE_DBG
1824 {
1825 auto text = spvtools::spvInstructionBinaryToText(
1826 vk::SPIRV_VERSION,
1827 insn.data(),
1828 insn.wordCount(),
1829 insns.data(),
1830 insns.size(),
1831 SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
1832 SPIRV_SHADER_DBG("{0}", text);
1833 }
1834 #endif // ENABLE_DBG_MSGS
1835
1836 switch(opcode)
1837 {
1838 case spv::OpTypeVoid:
1839 case spv::OpTypeInt:
1840 case spv::OpTypeFloat:
1841 case spv::OpTypeBool:
1842 case spv::OpTypeVector:
1843 case spv::OpTypeArray:
1844 case spv::OpTypeRuntimeArray:
1845 case spv::OpTypeMatrix:
1846 case spv::OpTypeStruct:
1847 case spv::OpTypePointer:
1848 case spv::OpTypeFunction:
1849 case spv::OpTypeImage:
1850 case spv::OpTypeSampledImage:
1851 case spv::OpTypeSampler:
1852 case spv::OpExecutionMode:
1853 case spv::OpExecutionModeId:
1854 case spv::OpMemoryModel:
1855 case spv::OpFunction:
1856 case spv::OpFunctionEnd:
1857 case spv::OpConstant:
1858 case spv::OpConstantNull:
1859 case spv::OpConstantTrue:
1860 case spv::OpConstantFalse:
1861 case spv::OpConstantComposite:
1862 case spv::OpSpecConstant:
1863 case spv::OpSpecConstantTrue:
1864 case spv::OpSpecConstantFalse:
1865 case spv::OpSpecConstantComposite:
1866 case spv::OpSpecConstantOp:
1867 case spv::OpUndef:
1868 case spv::OpExtension:
1869 case spv::OpCapability:
1870 case spv::OpEntryPoint:
1871 case spv::OpExtInstImport:
1872 case spv::OpDecorate:
1873 case spv::OpMemberDecorate:
1874 case spv::OpGroupDecorate:
1875 case spv::OpGroupMemberDecorate:
1876 case spv::OpDecorationGroup:
1877 case spv::OpDecorateId:
1878 case spv::OpDecorateString:
1879 case spv::OpMemberDecorateString:
1880 case spv::OpName:
1881 case spv::OpMemberName:
1882 case spv::OpSource:
1883 case spv::OpSourceContinued:
1884 case spv::OpSourceExtension:
1885 case spv::OpNoLine:
1886 case spv::OpModuleProcessed:
1887 case spv::OpString:
1888 // Nothing to do at emit time. These are either fully handled at analysis time,
1889 // or don't require any work at all.
1890 return EmitResult::Continue;
1891
1892 case spv::OpLine:
1893 return EmitLine(insn, state);
1894
1895 case spv::OpLabel:
1896 return EmitResult::Continue;
1897
1898 case spv::OpVariable:
1899 return EmitVariable(insn, state);
1900
1901 case spv::OpLoad:
1902 case spv::OpAtomicLoad:
1903 return EmitLoad(insn, state);
1904
1905 case spv::OpStore:
1906 case spv::OpAtomicStore:
1907 return EmitStore(insn, state);
1908
1909 case spv::OpAtomicIAdd:
1910 case spv::OpAtomicISub:
1911 case spv::OpAtomicSMin:
1912 case spv::OpAtomicSMax:
1913 case spv::OpAtomicUMin:
1914 case spv::OpAtomicUMax:
1915 case spv::OpAtomicAnd:
1916 case spv::OpAtomicOr:
1917 case spv::OpAtomicXor:
1918 case spv::OpAtomicIIncrement:
1919 case spv::OpAtomicIDecrement:
1920 case spv::OpAtomicExchange:
1921 return EmitAtomicOp(insn, state);
1922
1923 case spv::OpAtomicCompareExchange:
1924 return EmitAtomicCompareExchange(insn, state);
1925
1926 case spv::OpAccessChain:
1927 case spv::OpInBoundsAccessChain:
1928 return EmitAccessChain(insn, state);
1929
1930 case spv::OpCompositeConstruct:
1931 return EmitCompositeConstruct(insn, state);
1932
1933 case spv::OpCompositeInsert:
1934 return EmitCompositeInsert(insn, state);
1935
1936 case spv::OpCompositeExtract:
1937 return EmitCompositeExtract(insn, state);
1938
1939 case spv::OpVectorShuffle:
1940 return EmitVectorShuffle(insn, state);
1941
1942 case spv::OpVectorExtractDynamic:
1943 return EmitVectorExtractDynamic(insn, state);
1944
1945 case spv::OpVectorInsertDynamic:
1946 return EmitVectorInsertDynamic(insn, state);
1947
1948 case spv::OpVectorTimesScalar:
1949 case spv::OpMatrixTimesScalar:
1950 return EmitVectorTimesScalar(insn, state);
1951
1952 case spv::OpMatrixTimesVector:
1953 return EmitMatrixTimesVector(insn, state);
1954
1955 case spv::OpVectorTimesMatrix:
1956 return EmitVectorTimesMatrix(insn, state);
1957
1958 case spv::OpMatrixTimesMatrix:
1959 return EmitMatrixTimesMatrix(insn, state);
1960
1961 case spv::OpOuterProduct:
1962 return EmitOuterProduct(insn, state);
1963
1964 case spv::OpTranspose:
1965 return EmitTranspose(insn, state);
1966
1967 case spv::OpNot:
1968 case spv::OpBitFieldInsert:
1969 case spv::OpBitFieldSExtract:
1970 case spv::OpBitFieldUExtract:
1971 case spv::OpBitReverse:
1972 case spv::OpBitCount:
1973 case spv::OpSNegate:
1974 case spv::OpFNegate:
1975 case spv::OpLogicalNot:
1976 case spv::OpConvertFToU:
1977 case spv::OpConvertFToS:
1978 case spv::OpConvertSToF:
1979 case spv::OpConvertUToF:
1980 case spv::OpBitcast:
1981 case spv::OpIsInf:
1982 case spv::OpIsNan:
1983 case spv::OpDPdx:
1984 case spv::OpDPdxCoarse:
1985 case spv::OpDPdy:
1986 case spv::OpDPdyCoarse:
1987 case spv::OpFwidth:
1988 case spv::OpFwidthCoarse:
1989 case spv::OpDPdxFine:
1990 case spv::OpDPdyFine:
1991 case spv::OpFwidthFine:
1992 case spv::OpQuantizeToF16:
1993 return EmitUnaryOp(insn, state);
1994
1995 case spv::OpIAdd:
1996 case spv::OpISub:
1997 case spv::OpIMul:
1998 case spv::OpSDiv:
1999 case spv::OpUDiv:
2000 case spv::OpFAdd:
2001 case spv::OpFSub:
2002 case spv::OpFMul:
2003 case spv::OpFDiv:
2004 case spv::OpFMod:
2005 case spv::OpFRem:
2006 case spv::OpFOrdEqual:
2007 case spv::OpFUnordEqual:
2008 case spv::OpFOrdNotEqual:
2009 case spv::OpFUnordNotEqual:
2010 case spv::OpFOrdLessThan:
2011 case spv::OpFUnordLessThan:
2012 case spv::OpFOrdGreaterThan:
2013 case spv::OpFUnordGreaterThan:
2014 case spv::OpFOrdLessThanEqual:
2015 case spv::OpFUnordLessThanEqual:
2016 case spv::OpFOrdGreaterThanEqual:
2017 case spv::OpFUnordGreaterThanEqual:
2018 case spv::OpSMod:
2019 case spv::OpSRem:
2020 case spv::OpUMod:
2021 case spv::OpIEqual:
2022 case spv::OpINotEqual:
2023 case spv::OpUGreaterThan:
2024 case spv::OpSGreaterThan:
2025 case spv::OpUGreaterThanEqual:
2026 case spv::OpSGreaterThanEqual:
2027 case spv::OpULessThan:
2028 case spv::OpSLessThan:
2029 case spv::OpULessThanEqual:
2030 case spv::OpSLessThanEqual:
2031 case spv::OpShiftRightLogical:
2032 case spv::OpShiftRightArithmetic:
2033 case spv::OpShiftLeftLogical:
2034 case spv::OpBitwiseOr:
2035 case spv::OpBitwiseXor:
2036 case spv::OpBitwiseAnd:
2037 case spv::OpLogicalOr:
2038 case spv::OpLogicalAnd:
2039 case spv::OpLogicalEqual:
2040 case spv::OpLogicalNotEqual:
2041 case spv::OpUMulExtended:
2042 case spv::OpSMulExtended:
2043 case spv::OpIAddCarry:
2044 case spv::OpISubBorrow:
2045 return EmitBinaryOp(insn, state);
2046
2047 case spv::OpDot:
2048 case spv::OpSDot:
2049 case spv::OpUDot:
2050 case spv::OpSUDot:
2051 case spv::OpSDotAccSat:
2052 case spv::OpUDotAccSat:
2053 case spv::OpSUDotAccSat:
2054 return EmitDot(insn, state);
2055
2056 case spv::OpSelect:
2057 return EmitSelect(insn, state);
2058
2059 case spv::OpExtInst:
2060 return EmitExtendedInstruction(insn, state);
2061
2062 case spv::OpAny:
2063 return EmitAny(insn, state);
2064
2065 case spv::OpAll:
2066 return EmitAll(insn, state);
2067
2068 case spv::OpBranch:
2069 return EmitBranch(insn, state);
2070
2071 case spv::OpPhi:
2072 return EmitPhi(insn, state);
2073
2074 case spv::OpSelectionMerge:
2075 case spv::OpLoopMerge:
2076 return EmitResult::Continue;
2077
2078 case spv::OpBranchConditional:
2079 return EmitBranchConditional(insn, state);
2080
2081 case spv::OpSwitch:
2082 return EmitSwitch(insn, state);
2083
2084 case spv::OpUnreachable:
2085 return EmitUnreachable(insn, state);
2086
2087 case spv::OpReturn:
2088 return EmitReturn(insn, state);
2089
2090 case spv::OpFunctionCall:
2091 return EmitFunctionCall(insn, state);
2092
2093 case spv::OpKill:
2094 case spv::OpTerminateInvocation:
2095 return EmitTerminateInvocation(insn, state);
2096
2097 case spv::OpDemoteToHelperInvocation:
2098 return EmitDemoteToHelperInvocation(insn, state);
2099
2100 case spv::OpIsHelperInvocationEXT:
2101 return EmitIsHelperInvocation(insn, state);
2102
2103 case spv::OpImageSampleImplicitLod:
2104 case spv::OpImageSampleExplicitLod:
2105 case spv::OpImageSampleDrefImplicitLod:
2106 case spv::OpImageSampleDrefExplicitLod:
2107 case spv::OpImageSampleProjImplicitLod:
2108 case spv::OpImageSampleProjExplicitLod:
2109 case spv::OpImageSampleProjDrefImplicitLod:
2110 case spv::OpImageSampleProjDrefExplicitLod:
2111 case spv::OpImageGather:
2112 case spv::OpImageDrefGather:
2113 case spv::OpImageFetch:
2114 case spv::OpImageQueryLod:
2115 return EmitImageSample(ImageInstruction(insn, *this), state);
2116
2117 case spv::OpImageQuerySizeLod:
2118 return EmitImageQuerySizeLod(insn, state);
2119
2120 case spv::OpImageQuerySize:
2121 return EmitImageQuerySize(insn, state);
2122
2123 case spv::OpImageQueryLevels:
2124 return EmitImageQueryLevels(insn, state);
2125
2126 case spv::OpImageQuerySamples:
2127 return EmitImageQuerySamples(insn, state);
2128
2129 case spv::OpImageRead:
2130 return EmitImageRead(ImageInstruction(insn, *this), state);
2131
2132 case spv::OpImageWrite:
2133 return EmitImageWrite(ImageInstruction(insn, *this), state);
2134
2135 case spv::OpImageTexelPointer:
2136 return EmitImageTexelPointer(ImageInstruction(insn, *this), state);
2137
2138 case spv::OpSampledImage:
2139 case spv::OpImage:
2140 return EmitSampledImageCombineOrSplit(insn, state);
2141
2142 case spv::OpCopyObject:
2143 case spv::OpCopyLogical:
2144 return EmitCopyObject(insn, state);
2145
2146 case spv::OpCopyMemory:
2147 return EmitCopyMemory(insn, state);
2148
2149 case spv::OpControlBarrier:
2150 return EmitControlBarrier(insn, state);
2151
2152 case spv::OpMemoryBarrier:
2153 return EmitMemoryBarrier(insn, state);
2154
2155 case spv::OpGroupNonUniformElect:
2156 case spv::OpGroupNonUniformAll:
2157 case spv::OpGroupNonUniformAny:
2158 case spv::OpGroupNonUniformAllEqual:
2159 case spv::OpGroupNonUniformBroadcast:
2160 case spv::OpGroupNonUniformBroadcastFirst:
2161 case spv::OpGroupNonUniformBallot:
2162 case spv::OpGroupNonUniformInverseBallot:
2163 case spv::OpGroupNonUniformBallotBitExtract:
2164 case spv::OpGroupNonUniformBallotBitCount:
2165 case spv::OpGroupNonUniformBallotFindLSB:
2166 case spv::OpGroupNonUniformBallotFindMSB:
2167 case spv::OpGroupNonUniformShuffle:
2168 case spv::OpGroupNonUniformShuffleXor:
2169 case spv::OpGroupNonUniformShuffleUp:
2170 case spv::OpGroupNonUniformShuffleDown:
2171 case spv::OpGroupNonUniformIAdd:
2172 case spv::OpGroupNonUniformFAdd:
2173 case spv::OpGroupNonUniformIMul:
2174 case spv::OpGroupNonUniformFMul:
2175 case spv::OpGroupNonUniformSMin:
2176 case spv::OpGroupNonUniformUMin:
2177 case spv::OpGroupNonUniformFMin:
2178 case spv::OpGroupNonUniformSMax:
2179 case spv::OpGroupNonUniformUMax:
2180 case spv::OpGroupNonUniformFMax:
2181 case spv::OpGroupNonUniformBitwiseAnd:
2182 case spv::OpGroupNonUniformBitwiseOr:
2183 case spv::OpGroupNonUniformBitwiseXor:
2184 case spv::OpGroupNonUniformLogicalAnd:
2185 case spv::OpGroupNonUniformLogicalOr:
2186 case spv::OpGroupNonUniformLogicalXor:
2187 return EmitGroupNonUniform(insn, state);
2188
2189 case spv::OpArrayLength:
2190 return EmitArrayLength(insn, state);
2191
2192 default:
2193 UNREACHABLE("%s", OpcodeName(opcode));
2194 break;
2195 }
2196
2197 return EmitResult::Continue;
2198 }
2199
EmitAccessChain(InsnIterator insn,EmitState * state) const2200 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
2201 {
2202 Type::ID typeId = insn.word(1);
2203 Object::ID resultId = insn.word(2);
2204 Object::ID baseId = insn.word(3);
2205 auto &type = getType(typeId);
2206 ASSERT(type.componentCount == 1);
2207 ASSERT(getObject(resultId).kind == Object::Kind::Pointer);
2208
2209 if(type.storageClass == spv::StorageClassPushConstant ||
2210 type.storageClass == spv::StorageClassUniform ||
2211 type.storageClass == spv::StorageClassStorageBuffer)
2212 {
2213 auto ptr = WalkExplicitLayoutAccessChain(baseId, Span(insn, 4, insn.wordCount() - 4), state);
2214 state->createPointer(resultId, ptr);
2215 }
2216 else
2217 {
2218 auto ptr = WalkAccessChain(baseId, Span(insn, 4, insn.wordCount() - 4), state);
2219 state->createPointer(resultId, ptr);
2220 }
2221
2222 return EmitResult::Continue;
2223 }
2224
EmitCompositeConstruct(InsnIterator insn,EmitState * state) const2225 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
2226 {
2227 auto &type = getType(insn.resultTypeId());
2228 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2229 auto offset = 0u;
2230
2231 for(auto i = 0u; i < insn.wordCount() - 3; i++)
2232 {
2233 Object::ID srcObjectId = insn.word(3u + i);
2234 auto &srcObject = getObject(srcObjectId);
2235 auto &srcObjectTy = getType(srcObject);
2236 Operand srcObjectAccess(this, state, srcObjectId);
2237
2238 for(auto j = 0u; j < srcObjectTy.componentCount; j++)
2239 {
2240 dst.move(offset++, srcObjectAccess.Float(j));
2241 }
2242 }
2243
2244 return EmitResult::Continue;
2245 }
2246
EmitCompositeInsert(InsnIterator insn,EmitState * state) const2247 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
2248 {
2249 Type::ID resultTypeId = insn.word(1);
2250 auto &type = getType(resultTypeId);
2251 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2252 auto &newPartObject = getObject(insn.word(3));
2253 auto &newPartObjectTy = getType(newPartObject);
2254 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, Span(insn, 5, insn.wordCount() - 5));
2255
2256 Operand srcObjectAccess(this, state, insn.word(4));
2257 Operand newPartObjectAccess(this, state, insn.word(3));
2258
2259 // old components before
2260 for(auto i = 0u; i < firstNewComponent; i++)
2261 {
2262 dst.move(i, srcObjectAccess.Float(i));
2263 }
2264 // new part
2265 for(auto i = 0u; i < newPartObjectTy.componentCount; i++)
2266 {
2267 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
2268 }
2269 // old components after
2270 for(auto i = firstNewComponent + newPartObjectTy.componentCount; i < type.componentCount; i++)
2271 {
2272 dst.move(i, srcObjectAccess.Float(i));
2273 }
2274
2275 return EmitResult::Continue;
2276 }
2277
EmitCompositeExtract(InsnIterator insn,EmitState * state) const2278 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
2279 {
2280 auto &type = getType(insn.resultTypeId());
2281 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2282 auto &compositeObject = getObject(insn.word(3));
2283 Type::ID compositeTypeId = compositeObject.definition.word(1);
2284 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, Span(insn, 4, insn.wordCount() - 4));
2285
2286 Operand compositeObjectAccess(this, state, insn.word(3));
2287 for(auto i = 0u; i < type.componentCount; i++)
2288 {
2289 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
2290 }
2291
2292 return EmitResult::Continue;
2293 }
2294
EmitVectorShuffle(InsnIterator insn,EmitState * state) const2295 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
2296 {
2297 auto &type = getType(insn.resultTypeId());
2298 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2299
2300 // Note: number of components in result type, first half type, and second
2301 // half type are all independent.
2302 auto &firstHalfType = getObjectType(insn.word(3));
2303
2304 Operand firstHalfAccess(this, state, insn.word(3));
2305 Operand secondHalfAccess(this, state, insn.word(4));
2306
2307 for(auto i = 0u; i < type.componentCount; i++)
2308 {
2309 auto selector = insn.word(5 + i);
2310 if(selector == static_cast<uint32_t>(-1))
2311 {
2312 // Undefined value. Until we decide to do real undef values, zero is as good
2313 // a value as any
2314 dst.move(i, RValue<SIMD::Float>(0.0f));
2315 }
2316 else if(selector < firstHalfType.componentCount)
2317 {
2318 dst.move(i, firstHalfAccess.Float(selector));
2319 }
2320 else
2321 {
2322 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.componentCount));
2323 }
2324 }
2325
2326 return EmitResult::Continue;
2327 }
2328
EmitVectorExtractDynamic(InsnIterator insn,EmitState * state) const2329 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2330 {
2331 auto &type = getType(insn.resultTypeId());
2332 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2333 auto &srcType = getObjectType(insn.word(3));
2334
2335 Operand src(this, state, insn.word(3));
2336 Operand index(this, state, insn.word(4));
2337
2338 SIMD::UInt v = SIMD::UInt(0);
2339
2340 for(auto i = 0u; i < srcType.componentCount; i++)
2341 {
2342 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2343 }
2344
2345 dst.move(0, v);
2346 return EmitResult::Continue;
2347 }
2348
EmitVectorInsertDynamic(InsnIterator insn,EmitState * state) const2349 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2350 {
2351 auto &type = getType(insn.resultTypeId());
2352 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2353
2354 Operand src(this, state, insn.word(3));
2355 Operand component(this, state, insn.word(4));
2356 Operand index(this, state, insn.word(5));
2357
2358 for(auto i = 0u; i < type.componentCount; i++)
2359 {
2360 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2361 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2362 }
2363 return EmitResult::Continue;
2364 }
2365
EmitSelect(InsnIterator insn,EmitState * state) const2366 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2367 {
2368 auto &type = getType(insn.resultTypeId());
2369 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2370 auto cond = Operand(this, state, insn.word(3));
2371 auto condIsScalar = (cond.componentCount == 1);
2372 auto lhs = Operand(this, state, insn.word(4));
2373 auto rhs = Operand(this, state, insn.word(5));
2374
2375 for(auto i = 0u; i < type.componentCount; i++)
2376 {
2377 auto sel = cond.Int(condIsScalar ? 0 : i);
2378 dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse()
2379 }
2380
2381 SPIRV_SHADER_DBG("{0}: {1}", insn.word(2), dst);
2382 SPIRV_SHADER_DBG("{0}: {1}", insn.word(3), cond);
2383 SPIRV_SHADER_DBG("{0}: {1}", insn.word(4), lhs);
2384 SPIRV_SHADER_DBG("{0}: {1}", insn.word(5), rhs);
2385
2386 return EmitResult::Continue;
2387 }
2388
EmitAny(InsnIterator insn,EmitState * state) const2389 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2390 {
2391 auto &type = getType(insn.resultTypeId());
2392 ASSERT(type.componentCount == 1);
2393 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2394 auto &srcType = getObjectType(insn.word(3));
2395 auto src = Operand(this, state, insn.word(3));
2396
2397 SIMD::UInt result = src.UInt(0);
2398
2399 for(auto i = 1u; i < srcType.componentCount; i++)
2400 {
2401 result |= src.UInt(i);
2402 }
2403
2404 dst.move(0, result);
2405 return EmitResult::Continue;
2406 }
2407
EmitAll(InsnIterator insn,EmitState * state) const2408 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2409 {
2410 auto &type = getType(insn.resultTypeId());
2411 ASSERT(type.componentCount == 1);
2412 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2413 auto &srcType = getObjectType(insn.word(3));
2414 auto src = Operand(this, state, insn.word(3));
2415
2416 SIMD::UInt result = src.UInt(0);
2417
2418 for(auto i = 1u; i < srcType.componentCount; i++)
2419 {
2420 result &= src.UInt(i);
2421 }
2422
2423 dst.move(0, result);
2424 return EmitResult::Continue;
2425 }
2426
EmitAtomicOp(InsnIterator insn,EmitState * state) const2427 SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState *state) const
2428 {
2429 auto &resultType = getType(Type::ID(insn.word(1)));
2430 Object::ID resultId = insn.word(2);
2431 Object::ID pointerId = insn.word(3);
2432 Object::ID semanticsId = insn.word(5);
2433 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2434 auto memoryOrder = MemoryOrder(memorySemantics);
2435 // Where no value is provided (increment/decrement) use an implicit value of 1.
2436 auto value = (insn.wordCount() == 7) ? Operand(this, state, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
2437 auto &dst = state->createIntermediate(resultId, resultType.componentCount);
2438 auto ptr = state->getPointer(pointerId);
2439 auto ptrOffsets = ptr.offsets();
2440
2441 SIMD::Int mask = state->activeLaneMask() & state->storesAndAtomicsMask();
2442
2443 if(getObject(pointerId).opcode() == spv::OpImageTexelPointer)
2444 {
2445 mask &= ptr.isInBounds(sizeof(int32_t), OutOfBoundsBehavior::Nullify);
2446 }
2447
2448 SIMD::UInt result(0);
2449 for(int j = 0; j < SIMD::Width; j++)
2450 {
2451 If(Extract(mask, j) != 0)
2452 {
2453 auto offset = Extract(ptrOffsets, j);
2454 auto laneValue = Extract(value, j);
2455 UInt v;
2456 switch(insn.opcode())
2457 {
2458 case spv::OpAtomicIAdd:
2459 case spv::OpAtomicIIncrement:
2460 v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2461 break;
2462 case spv::OpAtomicISub:
2463 case spv::OpAtomicIDecrement:
2464 v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2465 break;
2466 case spv::OpAtomicAnd:
2467 v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2468 break;
2469 case spv::OpAtomicOr:
2470 v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2471 break;
2472 case spv::OpAtomicXor:
2473 v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2474 break;
2475 case spv::OpAtomicSMin:
2476 v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
2477 break;
2478 case spv::OpAtomicSMax:
2479 v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
2480 break;
2481 case spv::OpAtomicUMin:
2482 v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2483 break;
2484 case spv::OpAtomicUMax:
2485 v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2486 break;
2487 case spv::OpAtomicExchange:
2488 v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
2489 break;
2490 default:
2491 UNREACHABLE("%s", OpcodeName(insn.opcode()));
2492 break;
2493 }
2494 result = Insert(result, v, j);
2495 }
2496 }
2497
2498 dst.move(0, result);
2499 return EmitResult::Continue;
2500 }
2501
EmitAtomicCompareExchange(InsnIterator insn,EmitState * state) const2502 SpirvShader::EmitResult SpirvShader::EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const
2503 {
2504 // Separate from EmitAtomicOp due to different instruction encoding
2505 auto &resultType = getType(Type::ID(insn.word(1)));
2506 Object::ID resultId = insn.word(2);
2507
2508 auto memorySemanticsEqual = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(5)).constantValue[0]);
2509 auto memoryOrderEqual = MemoryOrder(memorySemanticsEqual);
2510 auto memorySemanticsUnequal = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(6)).constantValue[0]);
2511 auto memoryOrderUnequal = MemoryOrder(memorySemanticsUnequal);
2512
2513 auto value = Operand(this, state, insn.word(7));
2514 auto comparator = Operand(this, state, insn.word(8));
2515 auto &dst = state->createIntermediate(resultId, resultType.componentCount);
2516 auto ptr = state->getPointer(insn.word(3));
2517 auto ptrOffsets = ptr.offsets();
2518
2519 SIMD::UInt x(0);
2520 auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
2521 for(int j = 0; j < SIMD::Width; j++)
2522 {
2523 If(Extract(mask, j) != 0)
2524 {
2525 auto offset = Extract(ptrOffsets, j);
2526 auto laneValue = Extract(value.UInt(0), j);
2527 auto laneComparator = Extract(comparator.UInt(0), j);
2528 UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
2529 x = Insert(x, v, j);
2530 }
2531 }
2532
2533 dst.move(0, x);
2534 return EmitResult::Continue;
2535 }
2536
EmitCopyObject(InsnIterator insn,EmitState * state) const2537 SpirvShader::EmitResult SpirvShader::EmitCopyObject(InsnIterator insn, EmitState *state) const
2538 {
2539 auto type = getType(insn.resultTypeId());
2540 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
2541 auto src = Operand(this, state, insn.word(3));
2542 for(uint32_t i = 0; i < type.componentCount; i++)
2543 {
2544 dst.move(i, src.Int(i));
2545 }
2546 return EmitResult::Continue;
2547 }
2548
EmitArrayLength(InsnIterator insn,EmitState * state) const2549 SpirvShader::EmitResult SpirvShader::EmitArrayLength(InsnIterator insn, EmitState *state) const
2550 {
2551 auto structPtrId = Object::ID(insn.word(3));
2552 auto arrayFieldIdx = insn.word(4);
2553
2554 auto &resultType = getType(insn.resultTypeId());
2555 ASSERT(resultType.componentCount == 1);
2556 ASSERT(resultType.definition.opcode() == spv::OpTypeInt);
2557
2558 auto &structPtrTy = getObjectType(structPtrId);
2559 auto &structTy = getType(structPtrTy.element);
2560 auto arrayId = Type::ID(structTy.definition.word(2 + arrayFieldIdx));
2561
2562 auto &result = state->createIntermediate(insn.resultId(), 1);
2563 auto structBase = GetPointerToData(structPtrId, 0, state);
2564
2565 Decorations structDecorations = {};
2566 ApplyDecorationsForIdMember(&structDecorations, structPtrTy.element, arrayFieldIdx);
2567 ASSERT(structDecorations.HasOffset);
2568
2569 auto arrayBase = structBase + structDecorations.Offset;
2570 auto arraySizeInBytes = SIMD::Int(arrayBase.limit()) - arrayBase.offsets();
2571
2572 Decorations arrayDecorations = GetDecorationsForId(arrayId);
2573 ASSERT(arrayDecorations.HasArrayStride);
2574 auto arrayLength = arraySizeInBytes / SIMD::Int(arrayDecorations.ArrayStride);
2575
2576 result.move(0, SIMD::Int(arrayLength));
2577
2578 return EmitResult::Continue;
2579 }
2580
EmitExtendedInstruction(InsnIterator insn,EmitState * state) const2581 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2582 {
2583 auto ext = getExtension(insn.word(3));
2584 switch(ext.name)
2585 {
2586 case Extension::GLSLstd450:
2587 return EmitExtGLSLstd450(insn, state);
2588 case Extension::OpenCLDebugInfo100:
2589 return EmitOpenCLDebugInfo100(insn, state);
2590 case Extension::NonSemanticInfo:
2591 // An extended set name which is prefixed with "NonSemantic." is
2592 // guaranteed to contain only non-semantic instructions and all
2593 // OpExtInst instructions referencing this set can be ignored.
2594 break;
2595 default:
2596 UNREACHABLE("Unknown Extension::Name<%d>", int(ext.name));
2597 }
2598 return EmitResult::Continue;
2599 }
2600
GetConstScalarInt(Object::ID id) const2601 uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
2602 {
2603 auto &scopeObj = getObject(id);
2604 ASSERT(scopeObj.kind == Object::Kind::Constant);
2605 ASSERT(getType(scopeObj).componentCount == 1);
2606 return scopeObj.constantValue[0];
2607 }
2608
emitEpilog(SpirvRoutine * routine) const2609 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2610 {
2611 for(auto insn : *this)
2612 {
2613 switch(insn.opcode())
2614 {
2615 case spv::OpVariable:
2616 {
2617 auto &object = getObject(insn.resultId());
2618 auto &objectTy = getType(object);
2619 if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2620 {
2621 auto &dst = routine->getVariable(insn.resultId());
2622 int offset = 0;
2623 VisitInterface(insn.resultId(),
2624 [&](Decorations const &d, AttribType type) {
2625 auto scalarSlot = d.Location << 2 | d.Component;
2626 routine->outputs[scalarSlot] = dst[offset++];
2627 });
2628 }
2629 }
2630 break;
2631 default:
2632 break;
2633 }
2634 }
2635
2636 if(IsProfilingEnabled())
2637 {
2638 profiler->RegisterShaderForProfiling(std::to_string(insns.getIdentifier()) + "_" + std::to_string((uintptr_t)routine), std::move(routine->profData));
2639 }
2640 }
2641
clearPhis(SpirvRoutine * routine) const2642 void SpirvShader::clearPhis(SpirvRoutine *routine) const
2643 {
2644 // Clear phis that are no longer used. This serves two purposes:
2645 // (1) The phi rr::Variables are destructed, preventing pointless
2646 // materialization.
2647 // (2) Frees memory that will never be used again.
2648 routine->phis.clear();
2649 }
2650
executionModelToStage(spv::ExecutionModel model)2651 VkShaderStageFlagBits SpirvShader::executionModelToStage(spv::ExecutionModel model)
2652 {
2653 switch(model)
2654 {
2655 case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
2656 // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2657 // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2658 // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
2659 case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
2660 case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
2661 // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
2662 // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
2663 // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
2664 // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
2665 // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
2666 // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
2667 // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
2668 // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
2669 // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
2670 default:
2671 UNSUPPORTED("ExecutionModel: %d", int(model));
2672 return VkShaderStageFlagBits(0);
2673 }
2674 }
2675
Operand(const SpirvShader * shader,const EmitState * state,SpirvShader::Object::ID objectId)2676 SpirvShader::Operand::Operand(const SpirvShader *shader, const EmitState *state, SpirvShader::Object::ID objectId)
2677 : Operand(state, shader->getObject(objectId))
2678 {}
2679
Operand(const EmitState * state,const Object & object)2680 SpirvShader::Operand::Operand(const EmitState *state, const Object &object)
2681 : constant(object.kind == SpirvShader::Object::Kind::Constant ? object.constantValue.data() : nullptr)
2682 , intermediate(object.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(object.id()) : nullptr)
2683 , componentCount(intermediate ? intermediate->componentCount : object.constantValue.size())
2684 {
2685 ASSERT(intermediate || constant);
2686 }
2687
Operand(const Intermediate & value)2688 SpirvShader::Operand::Operand(const Intermediate &value)
2689 : constant(nullptr)
2690 , intermediate(&value)
2691 , componentCount(value.componentCount)
2692 {
2693 }
2694
isConstantZero() const2695 bool SpirvShader::Object::isConstantZero() const
2696 {
2697 if(kind != Kind::Constant)
2698 {
2699 return false;
2700 }
2701
2702 for(uint32_t i = 0; i < constantValue.size(); i++)
2703 {
2704 if(constantValue[i] != 0)
2705 {
2706 return false;
2707 }
2708 }
2709
2710 return true;
2711 }
2712
SpirvRoutine(vk::PipelineLayout const * pipelineLayout)2713 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout)
2714 : pipelineLayout(pipelineLayout)
2715 {
2716 }
2717
setImmutableInputBuiltins(SpirvShader const * shader)2718 void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader)
2719 {
2720 setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2721 ASSERT(builtin.SizeInComponents == 1);
2722 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
2723 });
2724
2725 setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2726 ASSERT(builtin.SizeInComponents == 4);
2727 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
2728 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2729 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2730 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2731 });
2732
2733 setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2734 ASSERT(builtin.SizeInComponents == 4);
2735 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
2736 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2737 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2738 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2739 });
2740
2741 setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2742 ASSERT(builtin.SizeInComponents == 4);
2743 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
2744 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2745 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2746 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2747 });
2748
2749 setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2750 ASSERT(builtin.SizeInComponents == 4);
2751 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
2752 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2753 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2754 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2755 });
2756
2757 setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2758 ASSERT(builtin.SizeInComponents == 4);
2759 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
2760 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2761 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2762 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2763 });
2764
2765 setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
2766 ASSERT(builtin.SizeInComponents == 1);
2767 // Only a single physical device is supported.
2768 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
2769 });
2770 }
2771
2772 } // namespace sw
2773