• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2024 The Khronos Group Inc.
6  * Copyright (c) 2024 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Device Generated Commands EXT Utility Code
23  *//*--------------------------------------------------------------------*/
24 #include "vktDGCUtilExt.hpp"
25 #include "vkBarrierUtil.hpp"
26 #include "vkCmdUtil.hpp"
27 #include "vkDefs.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkTypeUtil.hpp"
31 #include "vkShaderObjectUtil.hpp"
32 
33 #include <algorithm>
34 #include <iterator>
35 #include <bitset>
36 
37 namespace vkt
38 {
39 namespace DGC
40 {
41 
42 using namespace vk;
43 
checkDGCExtSupport(Context & context,VkShaderStageFlags stages,VkShaderStageFlags bindStagesPipeline,VkShaderStageFlags bindStagesShaderObject,VkIndirectCommandsInputModeFlagsEXT inputModeFlags,bool transformFeedback)44 void checkDGCExtSupport(Context &context, VkShaderStageFlags stages, VkShaderStageFlags bindStagesPipeline,
45                         VkShaderStageFlags bindStagesShaderObject, VkIndirectCommandsInputModeFlagsEXT inputModeFlags,
46                         bool transformFeedback)
47 {
48     context.requireDeviceFunctionality("VK_EXT_device_generated_commands");
49 
50     const auto &dgcProperties = context.getDeviceGeneratedCommandsPropertiesEXT();
51     if ((dgcProperties.supportedIndirectCommandsShaderStages & stages) != stages)
52         TCU_THROW(NotSupportedError, "Required DGC stages not supported");
53 
54     if ((dgcProperties.supportedIndirectCommandsShaderStagesPipelineBinding & bindStagesPipeline) != bindStagesPipeline)
55         TCU_THROW(NotSupportedError, "Required DGC pipeline bind stages not supported");
56 
57     if ((dgcProperties.supportedIndirectCommandsShaderStagesShaderBinding & bindStagesShaderObject) !=
58         bindStagesShaderObject)
59         TCU_THROW(NotSupportedError, "Required DGC shader object bind stages not supported");
60 
61     if ((dgcProperties.supportedIndirectCommandsInputModes & inputModeFlags) != inputModeFlags)
62         TCU_THROW(NotSupportedError, "Required DGC index buffer input modes not supported");
63 
64     if (transformFeedback && !dgcProperties.deviceGeneratedCommandsTransformFeedback)
65         TCU_THROW(NotSupportedError, "DGC transform feedback not supported");
66 }
67 
checkDGCExtComputeSupport(Context & context,DGCComputeSupportType supportType)68 void checkDGCExtComputeSupport(Context &context, DGCComputeSupportType supportType)
69 {
70     const auto stages                 = static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_COMPUTE_BIT);
71     const auto bindStagesPipeline     = ((supportType == DGCComputeSupportType::BIND_PIPELINE) ? stages : 0u);
72     const auto bindStagesShaderObject = ((supportType == DGCComputeSupportType::BIND_SHADER) ? stages : 0u);
73 
74     checkDGCExtSupport(context, stages, bindStagesPipeline, bindStagesShaderObject);
75 }
76 
makeIndirectExecutionSetInfo(const VkIndirectExecutionSetPipelineInfoEXT & pipelineInfo)77 VkIndirectExecutionSetInfoEXT makeIndirectExecutionSetInfo(const VkIndirectExecutionSetPipelineInfoEXT &pipelineInfo)
78 {
79     VkIndirectExecutionSetInfoEXT info;
80     info.pPipelineInfo = &pipelineInfo;
81     return info;
82 }
83 
makeIndirectExecutionSetInfo(const VkIndirectExecutionSetShaderInfoEXT & shaderInfo)84 VkIndirectExecutionSetInfoEXT makeIndirectExecutionSetInfo(const VkIndirectExecutionSetShaderInfoEXT &shaderInfo)
85 {
86     VkIndirectExecutionSetInfoEXT info;
87     info.pShaderInfo = &shaderInfo;
88     return info;
89 }
90 
ExecutionSetManager(const DeviceInterface & vkd,VkDevice device,const VkIndirectExecutionSetPipelineInfoEXT & pipelineInfo)91 ExecutionSetManager::ExecutionSetManager(const DeviceInterface &vkd, VkDevice device,
92                                          const VkIndirectExecutionSetPipelineInfoEXT &pipelineInfo)
93     : m_vkd(vkd)
94     , m_device(device)
95     , m_executionSet()
96     , m_pipelines(true)
97     , m_shaderObjects(false)
98     , m_pipelineWrites()
99     , m_shaderWrites()
100 {
101     const VkIndirectExecutionSetCreateInfoEXT createInfo = {
102         VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_CREATE_INFO_EXT, // VkStructureType sType;
103         nullptr,                                                  // void* pNext;
104         VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT,        // VkIndirectExecutionSetInfoTypeEXT type;
105         makeIndirectExecutionSetInfo(pipelineInfo),               // VkIndirectExecutionSetInfoEXT info;
106     };
107 
108     m_executionSet = createIndirectExecutionSetEXT(vkd, device, &createInfo);
109 }
110 
ExecutionSetManager(const DeviceInterface & vkd,VkDevice device,const VkIndirectExecutionSetShaderInfoEXT & shaderInfo)111 ExecutionSetManager::ExecutionSetManager(const DeviceInterface &vkd, VkDevice device,
112                                          const VkIndirectExecutionSetShaderInfoEXT &shaderInfo)
113     : m_vkd(vkd)
114     , m_device(device)
115     , m_executionSet()
116     , m_pipelines(false)
117     , m_shaderObjects(true)
118     , m_pipelineWrites()
119     , m_shaderWrites()
120 {
121     const VkIndirectExecutionSetCreateInfoEXT createInfo = {
122         VK_STRUCTURE_TYPE_INDIRECT_EXECUTION_SET_CREATE_INFO_EXT, // VkStructureType sType;
123         nullptr,                                                  // void* pNext;
124         VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT,   // VkIndirectExecutionSetInfoTypeEXT type;
125         makeIndirectExecutionSetInfo(shaderInfo),                 // VkIndirectExecutionSetInfoEXT info;
126     };
127 
128     m_executionSet = createIndirectExecutionSetEXT(vkd, device, &createInfo);
129 }
130 
addPipeline(uint32_t index,VkPipeline pipeline)131 void ExecutionSetManager::addPipeline(uint32_t index, VkPipeline pipeline)
132 {
133     DE_ASSERT(m_pipelines);
134 
135     // Avoid duplicating indices, which is illegal as per some VUs.
136     for (const auto &pipelineWrite : m_pipelineWrites)
137     {
138         if (pipelineWrite.index == index)
139         {
140             DE_ASSERT(pipelineWrite.pipeline == pipeline);
141             return;
142         }
143     }
144 
145     VkWriteIndirectExecutionSetPipelineEXT write = initVulkanStructure();
146     write.index                                  = index;
147     write.pipeline                               = pipeline;
148     m_pipelineWrites.push_back(write);
149 }
150 
addShader(uint32_t index,VkShaderEXT shader)151 void ExecutionSetManager::addShader(uint32_t index, VkShaderEXT shader)
152 {
153     DE_ASSERT(m_shaderObjects);
154 
155     // Avoid duplicating indices, which is illegal as per some VUs.
156     for (const auto &shaderWrite : m_shaderWrites)
157     {
158         if (shaderWrite.index == index)
159         {
160             DE_ASSERT(shaderWrite.shader == shader);
161             return;
162         }
163     }
164 
165     VkWriteIndirectExecutionSetShaderEXT write = initVulkanStructure();
166     write.index                                = index;
167     write.shader                               = shader;
168     m_shaderWrites.push_back(write);
169 }
170 
update(void)171 void ExecutionSetManager::update(void)
172 {
173     if (m_pipelines)
174     {
175         if (!m_pipelineWrites.empty())
176         {
177             m_vkd.updateIndirectExecutionSetPipelineEXT(m_device, *m_executionSet, de::sizeU32(m_pipelineWrites),
178                                                         de::dataOrNull(m_pipelineWrites));
179             m_pipelineWrites.clear();
180         }
181     }
182     else if (m_shaderObjects)
183     {
184         if (!m_shaderWrites.empty())
185         {
186             m_vkd.updateIndirectExecutionSetShaderEXT(m_device, *m_executionSet, de::sizeU32(m_shaderWrites),
187                                                       de::dataOrNull(m_shaderWrites));
188             m_shaderWrites.clear();
189         }
190     }
191     else
192         DE_ASSERT(false);
193 }
194 
get(bool requireNoPendingWrites) const195 VkIndirectExecutionSetEXT ExecutionSetManager::get(bool requireNoPendingWrites) const
196 {
197     if (requireNoPendingWrites)
198         assertNoPendingWrites();
199     return m_executionSet.get();
200 }
201 
makeExecutionSetManagerPipeline(const DeviceInterface & vkd,VkDevice device,VkPipeline initialPipeline,uint32_t maxPipelineCount)202 ExecutionSetManagerPtr makeExecutionSetManagerPipeline(const DeviceInterface &vkd, VkDevice device,
203                                                        VkPipeline initialPipeline, uint32_t maxPipelineCount)
204 {
205     VkIndirectExecutionSetPipelineInfoEXT info = initVulkanStructure();
206     info.initialPipeline                       = initialPipeline;
207     info.maxPipelineCount                      = maxPipelineCount;
208 
209     ExecutionSetManagerPtr ptr;
210     ptr.reset(new ExecutionSetManager(vkd, device, info));
211     return ptr;
212 }
213 
makeExecutionSetManagerShader(const DeviceInterface & vkd,VkDevice device,const std::vector<IESStageInfo> & stages,const std::vector<VkPushConstantRange> & pushConstantRanges,uint32_t maxShaderCount)214 ExecutionSetManagerPtr makeExecutionSetManagerShader(const DeviceInterface &vkd, VkDevice device,
215                                                      const std::vector<IESStageInfo> &stages,
216                                                      const std::vector<VkPushConstantRange> &pushConstantRanges,
217                                                      uint32_t maxShaderCount)
218 {
219     VkIndirectExecutionSetShaderInfoEXT info = initVulkanStructure();
220 
221     info.pushConstantRangeCount = de::sizeU32(pushConstantRanges);
222     info.pPushConstantRanges    = de::dataOrNull(pushConstantRanges);
223 
224     // Unzip information in the stages vector into individual arrays.
225 
226     std::vector<VkShaderEXT> shaders;
227     shaders.reserve(stages.size());
228     std::transform(begin(stages), end(stages), std::back_inserter(shaders),
229                    [](const IESStageInfo &shaderInfo) { return shaderInfo.shader; });
230 
231     std::vector<VkIndirectExecutionSetShaderLayoutInfoEXT> setLayoutInfos;
232     setLayoutInfos.reserve(stages.size());
233     std::transform(begin(stages), end(stages), std::back_inserter(setLayoutInfos),
234                    [](const IESStageInfo &shaderInfo)
235                    {
236                        VkIndirectExecutionSetShaderLayoutInfoEXT item = initVulkanStructure();
237                        item.setLayoutCount                            = de::sizeU32(shaderInfo.setLayouts);
238                        item.pSetLayouts                               = de::dataOrNull(shaderInfo.setLayouts);
239                        return item;
240                    });
241 
242     info.shaderCount     = de::sizeU32(stages);
243     info.pInitialShaders = de::dataOrNull(shaders);
244     info.maxShaderCount  = maxShaderCount;
245     info.pSetLayoutInfos = de::dataOrNull(setLayoutInfos);
246 
247     ExecutionSetManagerPtr ptr;
248     ptr.reset(new ExecutionSetManager(vkd, device, info));
249     return ptr;
250 }
251 
DGCMemReqsInfo(vk::VkIndirectExecutionSetEXT ies,vk::VkIndirectCommandsLayoutEXT cmdsLayout,uint32_t maxSeqCount,uint32_t maxDrawCount,vk::VkPipeline pipeline,const std::vector<vk::VkShaderEXT> * shaders)252 DGCMemReqsInfo::DGCMemReqsInfo(vk::VkIndirectExecutionSetEXT ies, vk::VkIndirectCommandsLayoutEXT cmdsLayout,
253                                uint32_t maxSeqCount, uint32_t maxDrawCount, vk::VkPipeline pipeline,
254                                const std::vector<vk::VkShaderEXT> *shaders)
255     : m_memReqs(initVulkanStructure())
256     , m_pipelineInfo(initVulkanStructure())
257     , m_shadersInfo(initVulkanStructure())
258     , m_shaders()
259 {
260     // Make sure we do not pass both.
261     DE_ASSERT(pipeline == VK_NULL_HANDLE || shaders == nullptr);
262 
263     if (ies == VK_NULL_HANDLE)
264     {
265         if (pipeline != VK_NULL_HANDLE)
266         {
267             m_pipelineInfo.pipeline = pipeline;
268             m_memReqs.pNext         = &m_pipelineInfo;
269         }
270         else if (shaders != nullptr)
271         {
272             DE_ASSERT(!shaders->empty());
273             m_shaders                 = *shaders;
274             m_shadersInfo.shaderCount = de::sizeU32(m_shaders);
275             m_shadersInfo.pShaders    = de::dataOrNull(m_shaders);
276             m_memReqs.pNext           = &m_shadersInfo;
277         }
278         else
279             DE_ASSERT(false);
280     }
281 
282     m_memReqs.indirectExecutionSet   = ies;
283     m_memReqs.indirectCommandsLayout = cmdsLayout;
284     m_memReqs.maxSequenceCount       = maxSeqCount;
285     m_memReqs.maxDrawCount           = maxDrawCount;
286 }
287 
DGCGenCmdsInfo(vk::VkShaderStageFlags shaderStages,vk::VkIndirectExecutionSetEXT ies,vk::VkIndirectCommandsLayoutEXT indirectCommandsLayout,vk::VkDeviceAddress indirectAddress,vk::VkDeviceSize indirectAddressSize,vk::VkDeviceAddress preprocessAddress,vk::VkDeviceSize preprocessSize,uint32_t maxSequenceCount,vk::VkDeviceAddress sequenceCountAddress,uint32_t maxDrawCount,vk::VkPipeline pipeline,const std::vector<vk::VkShaderEXT> * shaders)288 DGCGenCmdsInfo::DGCGenCmdsInfo(vk::VkShaderStageFlags shaderStages, vk::VkIndirectExecutionSetEXT ies,
289                                vk::VkIndirectCommandsLayoutEXT indirectCommandsLayout,
290                                vk::VkDeviceAddress indirectAddress, vk::VkDeviceSize indirectAddressSize,
291                                vk::VkDeviceAddress preprocessAddress, vk::VkDeviceSize preprocessSize,
292                                uint32_t maxSequenceCount, vk::VkDeviceAddress sequenceCountAddress,
293                                uint32_t maxDrawCount, vk::VkPipeline pipeline,
294                                const std::vector<vk::VkShaderEXT> *shaders)
295     : m_genCmdsInfo(initVulkanStructure())
296     , m_pipelineInfo(initVulkanStructure())
297     , m_shadersInfo(initVulkanStructure())
298     , m_shaders()
299 {
300     // Make sure we do not pass both.
301     DE_ASSERT(pipeline == VK_NULL_HANDLE || shaders == nullptr);
302 
303     if (ies == VK_NULL_HANDLE)
304     {
305         if (pipeline != VK_NULL_HANDLE)
306         {
307             m_pipelineInfo.pipeline = pipeline;
308             m_genCmdsInfo.pNext     = &m_pipelineInfo;
309         }
310         else if (shaders != nullptr)
311         {
312             DE_ASSERT(!shaders->empty());
313             m_shaders                 = *shaders;
314             m_shadersInfo.shaderCount = de::sizeU32(m_shaders);
315             m_shadersInfo.pShaders    = de::dataOrNull(m_shaders);
316             m_genCmdsInfo.pNext       = &m_shadersInfo;
317         }
318         else
319             DE_ASSERT(false);
320     }
321 
322     m_genCmdsInfo.shaderStages           = shaderStages;
323     m_genCmdsInfo.indirectExecutionSet   = ies;
324     m_genCmdsInfo.indirectCommandsLayout = indirectCommandsLayout;
325     m_genCmdsInfo.indirectAddress        = indirectAddress;
326     m_genCmdsInfo.indirectAddressSize    = indirectAddressSize;
327     m_genCmdsInfo.preprocessAddress      = preprocessAddress;
328     m_genCmdsInfo.preprocessSize         = preprocessSize;
329     m_genCmdsInfo.maxSequenceCount       = maxSequenceCount;
330     m_genCmdsInfo.sequenceCountAddress   = sequenceCountAddress;
331     m_genCmdsInfo.maxDrawCount           = maxDrawCount;
332 }
333 
DGCGenCmdsInfo(const DGCGenCmdsInfo & other)334 DGCGenCmdsInfo::DGCGenCmdsInfo(const DGCGenCmdsInfo &other)
335     : m_genCmdsInfo(other.m_genCmdsInfo)
336     , m_pipelineInfo(other.m_pipelineInfo)
337     , m_shadersInfo(other.m_shadersInfo)
338     , m_shaders(other.m_shaders)
339 {
340     // Fix shaders pointer.
341     if (!m_shaders.empty())
342         m_shadersInfo.pShaders = de::dataOrNull(m_shaders);
343 
344     // Fix pNext pointer so it points to *our* structure.
345     if (other.m_genCmdsInfo.pNext == reinterpret_cast<const void *>(&other.m_pipelineInfo))
346         m_genCmdsInfo.pNext = &m_pipelineInfo;
347     else if (other.m_genCmdsInfo.pNext == reinterpret_cast<const void *>(&other.m_shadersInfo))
348         m_genCmdsInfo.pNext = &m_shadersInfo;
349 
350     DE_ASSERT(m_pipelineInfo.pNext == nullptr);
351     DE_ASSERT(m_shadersInfo.pNext == nullptr);
352 }
353 
getGeneratedCommandsMemoryRequirementsExt(const DeviceInterface & vkd,VkDevice device,const VkGeneratedCommandsMemoryRequirementsInfoEXT & info)354 VkMemoryRequirements getGeneratedCommandsMemoryRequirementsExt(const DeviceInterface &vkd, VkDevice device,
355                                                                const VkGeneratedCommandsMemoryRequirementsInfoEXT &info)
356 {
357     VkMemoryRequirements2 memReqs = initVulkanStructure();
358     vkd.getGeneratedCommandsMemoryRequirementsEXT(device, &info, &memReqs);
359     return memReqs.memoryRequirements;
360 }
361 
preprocessToExecuteBarrierExt(const DeviceInterface & vkd,VkCommandBuffer cmdBuffer)362 void preprocessToExecuteBarrierExt(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer)
363 {
364     const auto preExecutionBarrier =
365         makeMemoryBarrier(VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_EXT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
366     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_EXT,
367                              VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, &preExecutionBarrier);
368 }
369 
IndirectCommandsLayoutBuilderExt(vk::VkIndirectCommandsLayoutUsageFlagsEXT usageFlags,vk::VkShaderStageFlags stageFlags,vk::VkPipelineLayout pipelineLayout,const vk::VkPipelineLayoutCreateInfo * pPipelineLayout)370 IndirectCommandsLayoutBuilderExt::IndirectCommandsLayoutBuilderExt(
371     vk::VkIndirectCommandsLayoutUsageFlagsEXT usageFlags, vk::VkShaderStageFlags stageFlags,
372     vk::VkPipelineLayout pipelineLayout, const vk::VkPipelineLayoutCreateInfo *pPipelineLayout)
373     : m_layoutUsageFlags(usageFlags)
374     , m_shaderStages(stageFlags)
375     , m_pipelineLayout(pipelineLayout)
376     , m_layoutCreateInfoPtr(pPipelineLayout)
377 {
378 }
379 
pushBackEmptyToken(void)380 IndirectCommandsLayoutBuilderExt::InternalToken &IndirectCommandsLayoutBuilderExt::pushBackEmptyToken(void)
381 {
382     m_tokens.emplace_back();
383     return m_tokens.back();
384 }
385 
addSimpleToken(uint32_t offset,vk::VkIndirectCommandsTokenTypeEXT tokenType)386 void IndirectCommandsLayoutBuilderExt::addSimpleToken(uint32_t offset, vk::VkIndirectCommandsTokenTypeEXT tokenType)
387 {
388     auto &internalToken  = pushBackEmptyToken();
389     internalToken.offset = offset;
390     internalToken.type   = tokenType;
391 }
392 
addPushConstantToken(uint32_t offset,const VkPushConstantRange & pcRange)393 void IndirectCommandsLayoutBuilderExt::addPushConstantToken(uint32_t offset, const VkPushConstantRange &pcRange)
394 {
395     auto &token  = pushBackEmptyToken();
396     token.type   = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT;
397     token.offset = offset;
398     token.pPushConstant.reset(new VkIndirectCommandsPushConstantTokenEXT{pcRange});
399 }
400 
addSequenceIndexToken(uint32_t offset,const VkPushConstantRange & pcRange)401 void IndirectCommandsLayoutBuilderExt::addSequenceIndexToken(uint32_t offset, const VkPushConstantRange &pcRange)
402 {
403     DE_ASSERT(pcRange.size == 4u); // Must be fixed by the spec.
404 
405     auto &token  = pushBackEmptyToken();
406     token.type   = vk::VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT;
407     token.offset = offset;
408     token.pPushConstant.reset(new VkIndirectCommandsPushConstantTokenEXT{pcRange});
409 }
410 
addVertexBufferToken(uint32_t offset,uint32_t bindingNumber)411 void IndirectCommandsLayoutBuilderExt::addVertexBufferToken(uint32_t offset, uint32_t bindingNumber)
412 {
413     auto &token  = pushBackEmptyToken();
414     token.type   = VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT;
415     token.offset = offset;
416     token.pVertexBuffer.reset(new VkIndirectCommandsVertexBufferTokenEXT{bindingNumber});
417 }
418 
addIndexBufferToken(uint32_t offset,vk::VkIndirectCommandsInputModeFlagBitsEXT mode)419 void IndirectCommandsLayoutBuilderExt::addIndexBufferToken(uint32_t offset,
420                                                            vk::VkIndirectCommandsInputModeFlagBitsEXT mode)
421 {
422     auto &token  = pushBackEmptyToken();
423     token.type   = VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT;
424     token.offset = offset;
425     token.pIndexBuffer.reset(new VkIndirectCommandsIndexBufferTokenEXT{
426         mode,
427     });
428 }
429 
addExecutionSetToken(uint32_t offset,vk::VkIndirectExecutionSetInfoTypeEXT setType,vk::VkShaderStageFlags stages)430 void IndirectCommandsLayoutBuilderExt::addExecutionSetToken(uint32_t offset,
431                                                             vk::VkIndirectExecutionSetInfoTypeEXT setType,
432                                                             vk::VkShaderStageFlags stages)
433 {
434     auto &token  = pushBackEmptyToken();
435     token.type   = VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT;
436     token.offset = offset;
437     token.pExecutionSet.reset(new VkIndirectCommandsExecutionSetTokenEXT{
438         setType,
439         stages,
440     });
441 }
442 
addComputePipelineToken(uint32_t offset)443 void IndirectCommandsLayoutBuilderExt::addComputePipelineToken(uint32_t offset)
444 {
445     addExecutionSetToken(offset, VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT,
446                          static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_COMPUTE_BIT));
447 }
448 
addComputeShaderObjectToken(uint32_t offset)449 void IndirectCommandsLayoutBuilderExt::addComputeShaderObjectToken(uint32_t offset)
450 {
451     addExecutionSetToken(offset, VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT,
452                          static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_COMPUTE_BIT));
453 }
454 
addDrawIndexedToken(uint32_t offset)455 void IndirectCommandsLayoutBuilderExt::addDrawIndexedToken(uint32_t offset)
456 {
457     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT);
458 }
459 
addDrawToken(uint32_t offset)460 void IndirectCommandsLayoutBuilderExt::addDrawToken(uint32_t offset)
461 {
462     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT);
463 }
464 
addDrawIndexedCountToken(uint32_t offset)465 void IndirectCommandsLayoutBuilderExt::addDrawIndexedCountToken(uint32_t offset)
466 {
467     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT);
468 }
469 
addDrawCountToken(uint32_t offset)470 void IndirectCommandsLayoutBuilderExt::addDrawCountToken(uint32_t offset)
471 {
472     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT);
473 }
474 
addDrawMeshTasksCountNvToken(uint32_t offset)475 void IndirectCommandsLayoutBuilderExt::addDrawMeshTasksCountNvToken(uint32_t offset)
476 {
477     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT);
478 }
479 
addDrawMeshTasksCountToken(uint32_t offset)480 void IndirectCommandsLayoutBuilderExt::addDrawMeshTasksCountToken(uint32_t offset)
481 {
482     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT);
483 }
484 
addDispatchToken(uint32_t offset)485 void IndirectCommandsLayoutBuilderExt::addDispatchToken(uint32_t offset)
486 {
487     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT);
488 }
489 
addDrawMeshTasksNvToken(uint32_t offset)490 void IndirectCommandsLayoutBuilderExt::addDrawMeshTasksNvToken(uint32_t offset)
491 {
492     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT);
493 }
494 
addDrawMeshTasksToken(uint32_t offset)495 void IndirectCommandsLayoutBuilderExt::addDrawMeshTasksToken(uint32_t offset)
496 {
497     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT);
498 }
499 
addTraceRays2Token(uint32_t offset)500 void IndirectCommandsLayoutBuilderExt::addTraceRays2Token(uint32_t offset)
501 {
502     addSimpleToken(offset, VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT);
503 }
504 
setStreamStride(uint32_t stride)505 void IndirectCommandsLayoutBuilderExt::setStreamStride(uint32_t stride)
506 {
507     // Save the manual stride for later use.
508     m_manualStride = tcu::just(stride);
509 }
510 
getStreamStride(void) const511 uint32_t IndirectCommandsLayoutBuilderExt::getStreamStride(void) const
512 {
513     if (static_cast<bool>(m_manualStride))
514         return *m_manualStride;
515     return getStreamRange();
516 }
517 
InternalToken()518 IndirectCommandsLayoutBuilderExt::InternalToken::InternalToken()
519     : type(VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_EXT)
520     , offset(std::numeric_limits<uint32_t>::max())
521     , pPushConstant()
522     , pVertexBuffer()
523     , pIndexBuffer()
524     , pExecutionSet()
525 {
526 }
527 
asVkToken(void) const528 VkIndirectCommandsLayoutTokenEXT IndirectCommandsLayoutBuilderExt::InternalToken::asVkToken(void) const
529 {
530     VkIndirectCommandsTokenDataEXT tokenData;
531 
532     if (type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT)
533         tokenData.pPushConstant = pPushConstant.get();
534     else if (type == vk::VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT)
535         tokenData.pPushConstant = pPushConstant.get();
536     else if (type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT)
537         tokenData.pVertexBuffer = pVertexBuffer.get();
538     else if (type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT)
539         tokenData.pIndexBuffer = pIndexBuffer.get();
540     else if (type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT)
541         tokenData.pExecutionSet = pExecutionSet.get();
542     else
543         deMemset(&tokenData, 0, sizeof(tokenData));
544 
545     const VkIndirectCommandsLayoutTokenEXT vkToken = {
546         VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_EXT, // VkStructureType sType;
547         nullptr,                                              // void* pNext;
548         type,                                                 // VkIndirectCommandsTokenTypeEXT type;
549         tokenData,                                            // VkIndirectCommandsTokenDataEXT data;
550         offset,                                               // uint32_t offset;
551     };
552     return vkToken;
553 }
554 
555 namespace
556 {
isWorkProvokingToken(VkIndirectCommandsTokenTypeEXT token)557 bool isWorkProvokingToken(VkIndirectCommandsTokenTypeEXT token)
558 {
559     bool isWorkProvoking = true;
560 
561     switch (token)
562     {
563     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT:
564     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT:
565     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT:
566     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT:
567     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT:
568         isWorkProvoking = false;
569         break;
570     default:
571         break;
572     }
573 
574     return isWorkProvoking;
575 }
576 
tokenDataSize(const VkIndirectCommandsLayoutTokenEXT & token)577 uint32_t tokenDataSize(const VkIndirectCommandsLayoutTokenEXT &token)
578 {
579     static constexpr uint32_t kU32Size  = 4u;
580     static constexpr uint32_t kFlagBits = 32u;
581 
582     switch (token.type)
583     {
584     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_EXECUTION_SET_EXT:
585     {
586         // When using pipelines, we only need 1 index. When using shader
587         // objects, we need one index per stage indicated in the token.
588         const auto indexCount =
589             (token.data.pExecutionSet->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) ?
590                 1u :
591                 static_cast<uint32_t>(std::bitset<kFlagBits>(token.data.pExecutionSet->shaderStages).count());
592         return kU32Size * indexCount;
593     }
594     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT:
595     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT:
596         return token.data.pPushConstant->updateRange.size;
597     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_EXT:
598         return DE_SIZEOF32(VkBindIndexBufferIndirectCommandEXT);
599     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_EXT:
600         return DE_SIZEOF32(VkBindVertexBufferIndirectCommandEXT);
601     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_EXT:
602         return DE_SIZEOF32(VkDrawIndexedIndirectCommand);
603     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_EXT:
604         return DE_SIZEOF32(VkDrawIndirectCommand);
605     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_COUNT_EXT:
606     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT:
607     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_NV_EXT:
608     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_COUNT_EXT:
609         // Note double indirection: the buffer specified here will contain different things for the different commands.
610         return DE_SIZEOF32(VkDrawIndirectCountIndirectCommandEXT);
611     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_EXT:
612         return DE_SIZEOF32(VkDispatchIndirectCommand);
613     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV_EXT:
614         return DE_SIZEOF32(VkDrawMeshTasksIndirectCommandNV);
615     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_EXT:
616         return DE_SIZEOF32(VkDrawMeshTasksIndirectCommandEXT);
617     case VK_INDIRECT_COMMANDS_TOKEN_TYPE_TRACE_RAYS2_EXT:
618         return DE_SIZEOF32(VkTraceRaysIndirectCommand2KHR);
619     default:
620         break;
621     }
622 
623     DE_ASSERT(false);
624     return 0u;
625 }
626 
627 } // namespace
628 
getStreamRange(void) const629 uint32_t IndirectCommandsLayoutBuilderExt::getStreamRange(void) const
630 {
631     uint32_t maxRange = 0u;
632     std::for_each(begin(m_tokens), end(m_tokens),
633                   [&maxRange](const InternalToken &token)
634                   {
635                       const auto vkToken = token.asVkToken();
636                       const auto range   = vkToken.offset + tokenDataSize(vkToken);
637 
638                       if (maxRange < range)
639                           maxRange = range;
640                   });
641     return maxRange;
642 }
643 
build(const DeviceInterface & vkd,VkDevice device,const VkAllocationCallbacks * pAllocator) const644 Move<VkIndirectCommandsLayoutEXT> IndirectCommandsLayoutBuilderExt::build(const DeviceInterface &vkd, VkDevice device,
645                                                                           const VkAllocationCallbacks *pAllocator) const
646 {
647     // Make sure we only have a work-provoking token and it's the last one in the sequence.
648     DE_ASSERT(!m_tokens.empty());
649     DE_ASSERT(isWorkProvokingToken(m_tokens.back().type));
650 
651     const auto wpTokenCount = std::count_if(
652         begin(m_tokens), end(m_tokens), [](const InternalToken &token) { return isWorkProvokingToken(token.type); });
653     DE_UNREF(wpTokenCount); // For release builds.
654     DE_ASSERT(wpTokenCount == 1u);
655 
656     // Transform internal tokens into Vulkan tokens.
657     std::vector<VkIndirectCommandsLayoutTokenEXT> vkTokens;
658     vkTokens.reserve(m_tokens.size());
659 
660     std::transform(begin(m_tokens), end(m_tokens), std::back_inserter(vkTokens),
661                    [](const InternalToken &token) { return token.asVkToken(); });
662 
663     // We must pass the layout if needed.
664     {
665         const auto pipelineLayoutNeeded = [](const InternalToken &token)
666         {
667             return (token.type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_EXT ||
668                     token.type == VK_INDIRECT_COMMANDS_TOKEN_TYPE_SEQUENCE_INDEX_EXT);
669         };
670         DE_UNREF(pipelineLayoutNeeded); // For release builds.
671         if (std::any_of(begin(m_tokens), end(m_tokens), pipelineLayoutNeeded))
672             DE_ASSERT(m_layoutCreateInfoPtr != nullptr || m_pipelineLayout != VK_NULL_HANDLE);
673     }
674     // But we can't pass both at the same time.
675     DE_ASSERT((m_layoutCreateInfoPtr == nullptr) || (m_pipelineLayout == VK_NULL_HANDLE));
676 
677     // Finally create the commands layout.
678     const VkIndirectCommandsLayoutCreateInfoEXT createInfo = {
679         VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_EXT, // VkStructureType sType;
680         m_layoutCreateInfoPtr,                                      // const void* pNext;
681         m_layoutUsageFlags,                                         // VkIndirectCommandsLayoutUsageFlagsEXT flags;
682         m_shaderStages,                                             // VkShaderStageFlags shaderStages;
683         getStreamStride(),                                          // uint32_t indirectStride;
684         m_pipelineLayout,                                           // VkPipelineLayout pipelineLayout;
685         de::sizeU32(vkTokens),                                      // uint32_t tokenCount;
686         de::dataOrNull(vkTokens),                                   // const VkIndirectCommandsLayoutTokenEXT* pTokens;
687     };
688 
689     return createIndirectCommandsLayoutEXT(vkd, device, &createInfo, pAllocator);
690 }
691 
PreprocessBufferExt(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkIndirectExecutionSetEXT indirectExecutionSet,VkIndirectCommandsLayoutEXT indirectCommandsLayout,uint32_t maxSequenceCount,uint32_t maxDrawCount,VkPipeline pipeline,const std::vector<vk::VkShaderEXT> * shaders,VkDeviceSize offset)692 PreprocessBufferExt::PreprocessBufferExt(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
693                                          VkIndirectExecutionSetEXT indirectExecutionSet,
694                                          VkIndirectCommandsLayoutEXT indirectCommandsLayout, uint32_t maxSequenceCount,
695                                          uint32_t maxDrawCount, VkPipeline pipeline,
696                                          const std::vector<vk::VkShaderEXT> *shaders, VkDeviceSize offset)
697     : m_offset(offset)
698     , m_buffer()
699     , m_bufferAllocation()
700     , m_size(0ull)
701     , m_deviceAddress(0ull)
702 {
703     const auto genCmdMemReqsInfo =
704         DGCMemReqsInfo(indirectExecutionSet, indirectCommandsLayout, maxSequenceCount, maxDrawCount, pipeline, shaders);
705     const auto origMemReqs = getGeneratedCommandsMemoryRequirementsExt(vkd, device, *genCmdMemReqsInfo);
706 
707     // Save the original required size. This is used by getSize() and others.
708     m_size = origMemReqs.size;
709 
710     // Align the requested offset to a multiple of the required alignment.
711     if (offset > 0ull)
712         m_offset = de::roundUp(offset, origMemReqs.alignment);
713 
714     if (needed())
715     {
716         // Calculate total buffer size based on the requested size and offset.
717         const VkDeviceSize preprocessSize = m_size + m_offset;
718 
719         const VkBufferUsageFlags2KHR bufferUsage =
720             (VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR | VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT);
721 
722         const VkBufferUsageFlags2CreateInfoKHR usageFlags2CreateInfo = {
723             VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR, // VkStructureType sType;
724             nullptr,                                                // const void* pNext;
725             bufferUsage,                                            // VkBufferUsageFlags2KHR usage;
726         };
727 
728         const VkBufferCreateInfo preprocessBufferCreateInfo = {
729             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
730             &usageFlags2CreateInfo,               // const void* pNext;
731             0u,                                   // VkBufferCreateFlags flags;
732             preprocessSize,                       // VkDeviceSize size;
733             0u,                                   // VkBufferUsageFlags usage;
734             VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
735             0u,                                   // uint32_t queueFamilyIndexCount;
736             nullptr,                              // const uint32_t* pQueueFamilyIndices;
737         };
738 
739         m_buffer = createBuffer(vkd, device, &preprocessBufferCreateInfo);
740 
741         VkMemoryRequirements bufferMemReqs;
742         vkd.getBufferMemoryRequirements(device, *m_buffer, &bufferMemReqs);
743 
744         // The buffer, created for preprocessing with the corresponding usage flags, should not have a required size
745         // that's smaller than the original size.
746         if (bufferMemReqs.size < preprocessSize)
747             TCU_FAIL("DGC memory requirements size larger than preprocess buffer requirements size");
748 
749         // The buffer alignment requirement must not be lower than the DGC alignment requirement.
750         if (bufferMemReqs.alignment < origMemReqs.alignment)
751             TCU_FAIL("DGC alignment requirement larger than preprocess buffer alignment requirement");
752 
753         // Find the largest alignment of the two.
754         bufferMemReqs.alignment = de::lcm(bufferMemReqs.alignment, origMemReqs.alignment);
755 
756         // Find the common memory types.
757         bufferMemReqs.memoryTypeBits &= origMemReqs.memoryTypeBits;
758 
759         m_bufferAllocation = allocator.allocate(bufferMemReqs, MemoryRequirement::DeviceAddress);
760         VK_CHECK(
761             vkd.bindBufferMemory(device, *m_buffer, m_bufferAllocation->getMemory(), m_bufferAllocation->getOffset()));
762 
763         const VkBufferDeviceAddressInfo deviceAddressInfo = {
764             VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
765             nullptr,                                      // const void* pNext;
766             *m_buffer,                                    // VkBuffer buffer;
767         };
768 
769         // Take the offset into account when calculating the base device address.
770         m_deviceAddress = vkd.getBufferDeviceAddress(device, &deviceAddressInfo) + m_offset;
771     }
772 }
773 
getBufferDeviceAddress(const DeviceInterface & vkd,VkDevice device,VkBuffer buffer)774 VkDeviceAddress getBufferDeviceAddress(const DeviceInterface &vkd, VkDevice device, VkBuffer buffer)
775 {
776     if (buffer == VK_NULL_HANDLE)
777         return 0ull;
778 
779     const VkBufferDeviceAddressInfo deviceAddressInfo{
780         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType    sType
781         nullptr,                                      // const void*        pNext
782         buffer                                        // VkBuffer           buffer;
783     };
784     return vkd.getBufferDeviceAddress(device, &deviceAddressInfo);
785 }
786 
DGCBuffer(const vk::DeviceInterface & vk,const vk::VkDevice device,vk::Allocator & allocator,const vk::VkDeviceSize size,const vk::VkBufferUsageFlags extraUsageFlags,const vk::MemoryRequirement extraMemReqs)787 DGCBuffer::DGCBuffer(const vk::DeviceInterface &vk, const vk::VkDevice device, vk::Allocator &allocator,
788                      const vk::VkDeviceSize size, const vk::VkBufferUsageFlags extraUsageFlags,
789                      const vk::MemoryRequirement extraMemReqs)
790     : m_size(size)
791     , m_buffer(vk, device, allocator,
792                makeBufferCreateInfo(size, (extraUsageFlags | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
793                                            VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)),
794                (MemoryRequirement::DeviceAddress | extraMemReqs))
795     , m_address(getBufferDeviceAddress(vk, device, m_buffer.get()))
796 {
797 }
798 
DGCComputePipelineExt(const DeviceInterface & vkd,VkDevice device,VkPipelineCreateFlags2KHR pipelineFlags,VkPipelineLayout pipelineLayout,VkPipelineShaderStageCreateFlags shaderStageCreateFlags,VkShaderModule module,const VkSpecializationInfo * specializationInfo,VkPipeline basePipelineHandle,int32_t basePipelineIndex,uint32_t subgroupSize)799 DGCComputePipelineExt::DGCComputePipelineExt(const DeviceInterface &vkd, VkDevice device,
800                                              VkPipelineCreateFlags2KHR pipelineFlags, VkPipelineLayout pipelineLayout,
801                                              VkPipelineShaderStageCreateFlags shaderStageCreateFlags,
802                                              VkShaderModule module, const VkSpecializationInfo *specializationInfo,
803                                              VkPipeline basePipelineHandle, int32_t basePipelineIndex,
804                                              uint32_t subgroupSize)
805 
806     : m_pipeline()
807 {
808     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroupSizeInfo = {
809         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO, // VkStructureType sType;
810         nullptr,                                                                    // void* pNext;
811         subgroupSize,                                                               // uint32_t requiredSubgroupSize;
812     };
813 
814     const auto shaderPNext = (subgroupSize > 0u ? &subgroupSizeInfo : nullptr);
815 
816     const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {
817         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
818         shaderPNext,                                         // const void* pNext;
819         shaderStageCreateFlags,                              // VkPipelineShaderStageCreateFlags flags;
820         VK_SHADER_STAGE_COMPUTE_BIT,                         // VkShaderStageFlagBits stage;
821         module,                                              // VkShaderModule module;
822         "main",                                              // const char* pName;
823         specializationInfo,                                  // const VkSpecializationInfo* pSpecializationInfo;
824     };
825 
826     // Make sure the required flag is always passed.
827     const auto creationFlags = (pipelineFlags | VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT);
828 
829     const VkPipelineCreateFlags2CreateInfoKHR pipelineFlagsCreateInfo = {
830         VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR, // VkStructureType sType;
831         nullptr,                                                   // const void* pNext;
832         creationFlags,                                             // VkPipelineCreateFlags2KHR flags;
833     };
834 
835     const VkComputePipelineCreateInfo createInfo = {
836         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
837         &pipelineFlagsCreateInfo,                       // const void* pNext;
838         0u,                                             // VkPipelineCreateFlags flags;
839         shaderStageCreateInfo,                          // VkPipelineShaderStageCreateInfo stage;
840         pipelineLayout,                                 // VkPipelineLayout layout;
841         basePipelineHandle,                             // VkPipeline basePipelineHandle;
842         basePipelineIndex,                              // int32_t basePipelineIndex;
843     };
844 
845     m_pipeline = createComputePipeline(vkd, device, VK_NULL_HANDLE, &createInfo);
846 }
847 
get(void) const848 VkPipeline DGCComputePipelineExt::get(void) const
849 {
850     return *m_pipeline;
851 }
operator *(void) const852 VkPipeline DGCComputePipelineExt::operator*(void) const
853 {
854     return get();
855 }
856 
DGCShaderExt(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkShaderStageFlagBits stage,vk::VkShaderCreateFlagsEXT shaderFlags,const vk::ProgramBinary & shaderBinary,const std::vector<vk::VkDescriptorSetLayout> & setLayouts,const std::vector<vk::VkPushConstantRange> & pushConstantRanges,bool tessellationFeature,bool geometryFeature,const vk::VkSpecializationInfo * specializationInfo,const void * pNext)857 DGCShaderExt::DGCShaderExt(const vk::DeviceInterface &vkd, vk::VkDevice device, vk::VkShaderStageFlagBits stage,
858                            vk::VkShaderCreateFlagsEXT shaderFlags, const vk::ProgramBinary &shaderBinary,
859                            const std::vector<vk::VkDescriptorSetLayout> &setLayouts,
860                            const std::vector<vk::VkPushConstantRange> &pushConstantRanges, bool tessellationFeature,
861                            bool geometryFeature, const vk::VkSpecializationInfo *specializationInfo, const void *pNext)
862 
863     : m_shader()
864 {
865     init(vkd, device, stage, shaderFlags, shaderBinary, setLayouts, pushConstantRanges, tessellationFeature,
866          geometryFeature, specializationInfo, pNext);
867 }
868 
DGCShaderExt(void)869 DGCShaderExt::DGCShaderExt(void) : m_shader()
870 {
871 }
872 
init(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkShaderStageFlagBits stage,vk::VkShaderCreateFlagsEXT shaderFlags,const vk::ProgramBinary & shaderBinary,const std::vector<vk::VkDescriptorSetLayout> & setLayouts,const std::vector<vk::VkPushConstantRange> & pushConstantRanges,bool tessellationFeature,bool geometryFeature,const vk::VkSpecializationInfo * specializationInfo,const void * pNext)873 void DGCShaderExt::init(const vk::DeviceInterface &vkd, vk::VkDevice device, vk::VkShaderStageFlagBits stage,
874                         vk::VkShaderCreateFlagsEXT shaderFlags, const vk::ProgramBinary &shaderBinary,
875                         const std::vector<vk::VkDescriptorSetLayout> &setLayouts,
876                         const std::vector<vk::VkPushConstantRange> &pushConstantRanges, bool tessellationFeature,
877                         bool geometryFeature, const vk::VkSpecializationInfo *specializationInfo, const void *pNext)
878 {
879     if (shaderBinary.getFormat() != PROGRAM_FORMAT_SPIRV)
880         TCU_THROW(InternalError, "Program format not supported");
881 
882     // Make sure not to forget the mandatory flag.
883     const auto createFlags = (shaderFlags | VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT);
884 
885     VkShaderStageFlags nextStage = 0u;
886     switch (stage)
887     {
888     case VK_SHADER_STAGE_VERTEX_BIT:
889         if (tessellationFeature)
890             nextStage |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
891         if (geometryFeature)
892             nextStage |= VK_SHADER_STAGE_GEOMETRY_BIT;
893         nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
894         break;
895     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
896         DE_ASSERT(tessellationFeature);
897         nextStage |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
898         break;
899     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
900         DE_ASSERT(tessellationFeature);
901         if (geometryFeature)
902             nextStage |= VK_SHADER_STAGE_GEOMETRY_BIT;
903         nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
904         break;
905     case VK_SHADER_STAGE_GEOMETRY_BIT:
906         DE_ASSERT(geometryFeature);
907         nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
908         break;
909     case VK_SHADER_STAGE_TASK_BIT_EXT:
910         nextStage |= VK_SHADER_STAGE_MESH_BIT_EXT;
911         break;
912     case VK_SHADER_STAGE_MESH_BIT_EXT:
913         nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
914         break;
915     default:
916         break;
917     }
918 
919     const VkShaderCreateInfoEXT shaderCreateInfo = {
920         VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, // VkStructureType sType;
921         pNext,                                    // const void* pNext;
922         createFlags,                              // VkShaderCreateFlagsEXT flags;
923         stage,                                    // VkShaderStageFlagBits stage;
924         nextStage,                                // VkShaderStageFlags nextStage;
925         VK_SHADER_CODE_TYPE_SPIRV_EXT,            // VkShaderCodeTypeEXT codeType;
926         shaderBinary.getSize(),                   // size_t codeSize;
927         shaderBinary.getBinary(),                 // const void* pCode;
928         "main",                                   // const char* pName;
929         de::sizeU32(setLayouts),                  // uint32_t setLayoutCount;
930         de::dataOrNull(setLayouts),               // const VkDescriptorSetLayout* pSetLayouts;
931         de::sizeU32(pushConstantRanges),          // uint32_t pushConstantRangeCount;
932         de::dataOrNull(pushConstantRanges),       // const VkPushConstantRange* pPushConstantRanges;
933         specializationInfo,                       // const VkSpecializationInfo* pSpecializationInfo;
934     };
935 
936     shaderBinary.setUsed();
937     m_shader = createShader(vkd, device, shaderCreateInfo);
938 }
939 
DGCComputeShaderExt(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkShaderCreateFlagsEXT shaderFlags,const vk::ProgramBinary & shaderBinary,const std::vector<vk::VkDescriptorSetLayout> & setLayouts,const std::vector<vk::VkPushConstantRange> & pushConstantRanges,const vk::VkSpecializationInfo * specializationInfo,uint32_t subgroupSize)940 DGCComputeShaderExt::DGCComputeShaderExt(const vk::DeviceInterface &vkd, vk::VkDevice device,
941                                          vk::VkShaderCreateFlagsEXT shaderFlags, const vk::ProgramBinary &shaderBinary,
942                                          const std::vector<vk::VkDescriptorSetLayout> &setLayouts,
943                                          const std::vector<vk::VkPushConstantRange> &pushConstantRanges,
944                                          const vk::VkSpecializationInfo *specializationInfo, uint32_t subgroupSize)
945 
946     : DGCShaderExt()
947 {
948     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroupSizeInfo = {
949         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO, // VkStructureType sType;
950         nullptr,                                                                    // void* pNext;
951         subgroupSize,                                                               // uint32_t requiredSubgroupSize;
952     };
953 
954     const auto pNext = (subgroupSize > 0u ? &subgroupSizeInfo : nullptr);
955 
956     init(vkd, device, VK_SHADER_STAGE_COMPUTE_BIT, shaderFlags, shaderBinary, setLayouts, pushConstantRanges, false,
957          false, specializationInfo, pNext);
958 }
959 
960 namespace
961 {
962 
makeVertexInputBindingDescription2(const VkVertexInputBindingDescription & description)963 VkVertexInputBindingDescription2EXT makeVertexInputBindingDescription2(
964     const VkVertexInputBindingDescription &description)
965 {
966     const VkVertexInputBindingDescription2EXT desc2 = {
967         VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, // VkStructureType sType;
968         nullptr,                                                  // void* pNext;
969         description.binding,                                      // uint32_t binding;
970         description.stride,                                       // uint32_t stride;
971         description.inputRate,                                    // VkVertexInputRate inputRate;
972         1u,                                                       // uint32_t divisor;
973     };
974     return desc2;
975 }
976 
makeVertexInputAttributeDescription2(const VkVertexInputAttributeDescription & description)977 VkVertexInputAttributeDescription2EXT makeVertexInputAttributeDescription2(
978     const VkVertexInputAttributeDescription &description)
979 {
980     const VkVertexInputAttributeDescription2EXT desc2 = {
981         VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, // VkStructureType sType;
982         nullptr,                                                    // void* pNext;
983         description.location,                                       // uint32_t location;
984         description.binding,                                        // uint32_t binding;
985         description.format,                                         // VkFormat format;
986         description.offset,                                         // uint32_t offset;
987     };
988     return desc2;
989 }
990 
991 } // anonymous namespace
992 
getDeviceCreationExtensions(Context & context)993 std::vector<std::string> getDeviceCreationExtensions(Context &context)
994 {
995     const auto &extList = context.getDeviceCreationExtensions();
996     std::vector<std::string> ret(begin(extList), end(extList));
997     return ret;
998 }
999 
bindShaderObjectState(const DeviceInterface & vkd,const std::vector<std::string> & deviceExtensions,const VkCommandBuffer cmdBuffer,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const uint32_t patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo)1000 void bindShaderObjectState(const DeviceInterface &vkd, const std::vector<std::string> &deviceExtensions,
1001                            const VkCommandBuffer cmdBuffer, const std::vector<VkViewport> &viewports,
1002                            const std::vector<VkRect2D> &scissors, const VkPrimitiveTopology topology,
1003                            const uint32_t patchControlPoints,
1004                            const VkPipelineVertexInputStateCreateInfo *vertexInputStateCreateInfo,
1005                            const VkPipelineRasterizationStateCreateInfo *rasterizationStateCreateInfo,
1006                            const VkPipelineMultisampleStateCreateInfo *multisampleStateCreateInfo,
1007                            const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo,
1008                            const VkPipelineColorBlendStateCreateInfo *colorBlendStateCreateInfo)
1009 {
1010     if (vertexInputStateCreateInfo)
1011     {
1012         // This is not used with mesh shaders.
1013         const auto &srcBindingDescs = vertexInputStateCreateInfo->pVertexBindingDescriptions;
1014         const auto &srcBindingCount = vertexInputStateCreateInfo->vertexBindingDescriptionCount;
1015 
1016         const auto &srcAttributeDescs = vertexInputStateCreateInfo->pVertexAttributeDescriptions;
1017         const auto &srcAttributeCount = vertexInputStateCreateInfo->vertexAttributeDescriptionCount;
1018 
1019         std::vector<VkVertexInputBindingDescription2EXT> bindingDescriptions;
1020         bindingDescriptions.reserve(srcBindingCount);
1021         std::transform(srcBindingDescs, srcBindingDescs + srcBindingCount, std::back_inserter(bindingDescriptions),
1022                        [](const VkVertexInputBindingDescription &description)
1023                        { return makeVertexInputBindingDescription2(description); });
1024 
1025         std::vector<VkVertexInputAttributeDescription2EXT> attributeDescriptions;
1026         attributeDescriptions.reserve(srcAttributeCount);
1027         std::transform(srcAttributeDescs, srcAttributeDescs + srcAttributeCount,
1028                        std::back_inserter(attributeDescriptions),
1029                        [](const VkVertexInputAttributeDescription &description)
1030                        { return makeVertexInputAttributeDescription2(description); });
1031 
1032         vkd.cmdSetVertexInputEXT(cmdBuffer, de::sizeU32(bindingDescriptions), de::dataOrNull(bindingDescriptions),
1033                                  de::sizeU32(attributeDescriptions), de::dataOrNull(attributeDescriptions));
1034     }
1035 
1036     if (vertexInputStateCreateInfo)
1037     {
1038         // This is not used with mesh shaders.
1039         vkd.cmdSetPrimitiveTopology(cmdBuffer, topology);
1040         vkd.cmdSetPrimitiveRestartEnable(cmdBuffer, VK_FALSE);
1041 
1042         if (patchControlPoints > 0u)
1043         {
1044             vkd.cmdSetPatchControlPointsEXT(cmdBuffer, patchControlPoints);
1045             vkd.cmdSetTessellationDomainOriginEXT(cmdBuffer, VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT);
1046         }
1047     }
1048 
1049     {
1050         vkd.cmdSetViewportWithCount(cmdBuffer, de::sizeU32(viewports), de::dataOrNull(viewports));
1051         vkd.cmdSetScissorWithCount(cmdBuffer, de::sizeU32(scissors), de::dataOrNull(scissors));
1052     }
1053 
1054     {
1055         const auto depthClampEnable =
1056             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->depthClampEnable : VK_FALSE);
1057         const auto rasterizerDiscardEnable =
1058             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->rasterizerDiscardEnable : VK_FALSE);
1059         const auto polygonMode =
1060             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->polygonMode : VK_POLYGON_MODE_FILL);
1061         const auto cullMode = (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->cullMode :
1062                                                               static_cast<VkCullModeFlags>(VK_CULL_MODE_NONE));
1063         const auto frontFace =
1064             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->frontFace : VK_FRONT_FACE_COUNTER_CLOCKWISE);
1065         const auto depthBiasEnable =
1066             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->depthBiasEnable : VK_FALSE);
1067         const auto depthBiasConstantFactor =
1068             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->depthBiasConstantFactor : 0.0f);
1069         const auto depthBiasClamp =
1070             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->depthBiasClamp : 0.0f);
1071         const auto depthBiasSlopeFactor =
1072             (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->depthBiasSlopeFactor : 0.0f);
1073         const auto lineWidth = (rasterizationStateCreateInfo ? rasterizationStateCreateInfo->lineWidth : 1.0f);
1074 
1075         vkd.cmdSetDepthClampEnableEXT(cmdBuffer, depthClampEnable);
1076         vkd.cmdSetRasterizerDiscardEnable(cmdBuffer, rasterizerDiscardEnable);
1077         vkd.cmdSetPolygonModeEXT(cmdBuffer, polygonMode);
1078         vkd.cmdSetCullMode(cmdBuffer, cullMode);
1079         vkd.cmdSetFrontFace(cmdBuffer, frontFace);
1080         vkd.cmdSetDepthBiasEnable(cmdBuffer, depthBiasEnable);
1081         vkd.cmdSetDepthBias(cmdBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1082         vkd.cmdSetLineWidth(cmdBuffer, lineWidth);
1083     }
1084 
1085     {
1086         const auto rasterizationSamples =
1087             (multisampleStateCreateInfo ? multisampleStateCreateInfo->rasterizationSamples : VK_SAMPLE_COUNT_1_BIT);
1088         const auto defaultSampleMask = 0xFFFFFFFFu;
1089         const auto pSampleMask =
1090             (multisampleStateCreateInfo ? multisampleStateCreateInfo->pSampleMask : &defaultSampleMask);
1091         const auto alphaToCoverageEnable =
1092             (multisampleStateCreateInfo ? multisampleStateCreateInfo->alphaToCoverageEnable : VK_FALSE);
1093         const auto alphaToOneEnable =
1094             (multisampleStateCreateInfo ? multisampleStateCreateInfo->alphaToOneEnable : VK_FALSE);
1095 
1096         vkd.cmdSetRasterizationSamplesEXT(cmdBuffer, rasterizationSamples);
1097         vkd.cmdSetSampleMaskEXT(cmdBuffer, rasterizationSamples, pSampleMask);
1098         vkd.cmdSetAlphaToCoverageEnableEXT(cmdBuffer, alphaToCoverageEnable);
1099         vkd.cmdSetAlphaToOneEnableEXT(cmdBuffer, alphaToOneEnable);
1100     }
1101 
1102     {
1103         const auto defaultStencilOp = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP,
1104                                                          VK_COMPARE_OP_NEVER, 0u, 0u, 0u);
1105 
1106         const auto depthTestEnable =
1107             (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->depthTestEnable : VK_FALSE);
1108         const auto depthWriteEnable =
1109             (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->depthWriteEnable : VK_FALSE);
1110         const auto depthCompareOp =
1111             (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->depthCompareOp : VK_COMPARE_OP_NEVER);
1112         const auto depthBoundsTestEnable =
1113             (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->depthBoundsTestEnable : VK_FALSE);
1114         const auto stencilTestEnable =
1115             (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->stencilTestEnable : VK_FALSE);
1116         const auto stencilFront = (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->front : defaultStencilOp);
1117         const auto stencilBack  = (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->back : defaultStencilOp);
1118         const auto minDepthBounds = (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->minDepthBounds : 0.0f);
1119         const auto maxDepthBounds = (depthStencilStateCreateInfo ? depthStencilStateCreateInfo->maxDepthBounds : 0.0f);
1120 
1121         vkd.cmdSetDepthTestEnable(cmdBuffer, depthTestEnable);
1122         vkd.cmdSetDepthWriteEnable(cmdBuffer, depthWriteEnable);
1123         vkd.cmdSetDepthCompareOp(cmdBuffer, depthCompareOp);
1124         vkd.cmdSetDepthBoundsTestEnable(cmdBuffer, depthBoundsTestEnable);
1125         vkd.cmdSetStencilTestEnable(cmdBuffer, stencilTestEnable);
1126 
1127         vkd.cmdSetStencilOp(cmdBuffer, VK_STENCIL_FACE_FRONT_BIT, stencilFront.failOp, stencilFront.passOp,
1128                             stencilFront.depthFailOp, stencilFront.compareOp);
1129         vkd.cmdSetStencilCompareMask(cmdBuffer, VK_STENCIL_FACE_FRONT_BIT, stencilFront.compareMask);
1130         vkd.cmdSetStencilWriteMask(cmdBuffer, VK_STENCIL_FACE_FRONT_BIT, stencilFront.writeMask);
1131         vkd.cmdSetStencilReference(cmdBuffer, VK_STENCIL_FACE_FRONT_BIT, stencilFront.reference);
1132 
1133         vkd.cmdSetStencilOp(cmdBuffer, VK_STENCIL_FACE_BACK_BIT, stencilBack.failOp, stencilBack.passOp,
1134                             stencilBack.depthFailOp, stencilBack.compareOp);
1135         vkd.cmdSetStencilCompareMask(cmdBuffer, VK_STENCIL_FACE_BACK_BIT, stencilBack.compareMask);
1136         vkd.cmdSetStencilWriteMask(cmdBuffer, VK_STENCIL_FACE_BACK_BIT, stencilBack.writeMask);
1137         vkd.cmdSetStencilReference(cmdBuffer, VK_STENCIL_FACE_BACK_BIT, stencilBack.reference);
1138 
1139         vkd.cmdSetDepthBounds(cmdBuffer, minDepthBounds, maxDepthBounds);
1140     }
1141 
1142     {
1143         const auto logicOpEnable = (colorBlendStateCreateInfo ? colorBlendStateCreateInfo->logicOpEnable : VK_FALSE);
1144         const auto logicOp       = (colorBlendStateCreateInfo ? colorBlendStateCreateInfo->logicOp : VK_LOGIC_OP_CLEAR);
1145 
1146         vkd.cmdSetLogicOpEnableEXT(cmdBuffer, logicOpEnable);
1147         vkd.cmdSetLogicOpEXT(cmdBuffer, logicOp);
1148 
1149         std::vector<VkBool32> colorWriteEnables;
1150         std::vector<VkColorComponentFlags> colorWriteMasks;
1151         std::vector<VkBool32> colorBlendEnables;
1152         std::vector<VkColorBlendEquationEXT> colorBlendEquations;
1153 
1154         if (!colorBlendStateCreateInfo)
1155         {
1156             const auto defaultWriteMask = (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
1157                                            VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
1158             const auto defaultBlendEq =
1159                 makeColorBlendEquationEXT(VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
1160                                           VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD);
1161 
1162             colorWriteEnables.push_back(VK_TRUE);
1163             colorWriteMasks.push_back(defaultWriteMask);
1164             colorBlendEnables.push_back(VK_FALSE);
1165             colorBlendEquations.push_back(defaultBlendEq);
1166         }
1167         else
1168         {
1169             const auto &attCount     = colorBlendStateCreateInfo->attachmentCount;
1170             const auto &pAttachments = colorBlendStateCreateInfo->pAttachments;
1171 
1172             colorWriteEnables.reserve(attCount);
1173             colorWriteMasks.reserve(attCount);
1174             colorBlendEnables.reserve(attCount);
1175             colorBlendEquations.reserve(attCount);
1176 
1177             colorWriteEnables = std::vector<VkBool32>(attCount, VK_TRUE);
1178 
1179             std::transform(pAttachments, pAttachments + attCount, std::back_inserter(colorWriteMasks),
1180                            [](const VkPipelineColorBlendAttachmentState &attState) { return attState.colorWriteMask; });
1181 
1182             std::transform(pAttachments, pAttachments + attCount, std::back_inserter(colorBlendEnables),
1183                            [](const VkPipelineColorBlendAttachmentState &attState) { return attState.blendEnable; });
1184 
1185             std::transform(pAttachments, pAttachments + attCount, std::back_inserter(colorBlendEquations),
1186                            [](const VkPipelineColorBlendAttachmentState &attState)
1187                            {
1188                                return makeColorBlendEquationEXT(
1189                                    attState.srcColorBlendFactor, attState.dstColorBlendFactor, attState.colorBlendOp,
1190                                    attState.srcAlphaBlendFactor, attState.dstAlphaBlendFactor, attState.alphaBlendOp);
1191                            });
1192         }
1193 
1194         vkd.cmdSetColorWriteEnableEXT(cmdBuffer, de::sizeU32(colorWriteEnables), de::dataOrNull(colorWriteEnables));
1195         vkd.cmdSetColorWriteMaskEXT(cmdBuffer, 0u, de::sizeU32(colorWriteMasks), de::dataOrNull(colorWriteMasks));
1196         vkd.cmdSetColorBlendEnableEXT(cmdBuffer, 0u, de::sizeU32(colorBlendEnables), de::dataOrNull(colorBlendEnables));
1197         vkd.cmdSetColorBlendEquationEXT(cmdBuffer, 0u, de::sizeU32(colorBlendEquations),
1198                                         de::dataOrNull(colorBlendEquations));
1199     }
1200 
1201     // Extra states with default values depending on enabled extensions.
1202     const auto extraDynStates = getShaderObjectDynamicStatesFromExtensions(deviceExtensions);
1203     for (const auto dynState : extraDynStates)
1204     {
1205         switch (dynState)
1206         {
1207         case VK_DYNAMIC_STATE_RASTERIZATION_STREAM_EXT:
1208             vkd.cmdSetRasterizationStreamEXT(cmdBuffer, 0u);
1209             break;
1210         case VK_DYNAMIC_STATE_COLOR_BLEND_ADVANCED_EXT:
1211             break;
1212         case VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT:
1213             vkd.cmdSetConservativeRasterizationModeEXT(cmdBuffer, VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
1214             break;
1215         case VK_DYNAMIC_STATE_COVERAGE_MODULATION_MODE_NV:
1216             vkd.cmdSetCoverageModulationModeNV(cmdBuffer, VK_COVERAGE_MODULATION_MODE_NONE_NV);
1217             break;
1218         case VK_DYNAMIC_STATE_COVERAGE_MODULATION_TABLE_ENABLE_NV:
1219             vkd.cmdSetCoverageModulationTableEnableNV(cmdBuffer, VK_FALSE);
1220             break;
1221         case VK_DYNAMIC_STATE_COVERAGE_MODULATION_TABLE_NV:
1222             // CoverageModulationTableEnable is false, so we can skip this.
1223             //vkd.cmdSetCoverageModulationTableNV(cmdBuffer, 0u, nullptr);
1224             break;
1225         case VK_DYNAMIC_STATE_COVERAGE_REDUCTION_MODE_NV:
1226             vkd.cmdSetCoverageReductionModeNV(cmdBuffer, VK_COVERAGE_REDUCTION_MODE_MERGE_NV);
1227             break;
1228         case VK_DYNAMIC_STATE_COVERAGE_TO_COLOR_ENABLE_NV:
1229             vkd.cmdSetCoverageToColorEnableNV(cmdBuffer, VK_FALSE);
1230             break;
1231         case VK_DYNAMIC_STATE_COVERAGE_TO_COLOR_LOCATION_NV:
1232             vkd.cmdSetCoverageToColorLocationNV(cmdBuffer, 0u);
1233             break;
1234         case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT:
1235             vkd.cmdSetDepthClipEnableEXT(cmdBuffer, VK_FALSE);
1236             break;
1237         case VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT:
1238             vkd.cmdSetDepthClipNegativeOneToOneEXT(cmdBuffer, VK_FALSE);
1239             break;
1240         case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
1241             break;
1242         case VK_DYNAMIC_STATE_EXTRA_PRIMITIVE_OVERESTIMATION_SIZE_EXT:
1243             vkd.cmdSetExtraPrimitiveOverestimationSizeEXT(cmdBuffer, 0.0f);
1244             break;
1245         case VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT:
1246             vkd.cmdSetLineRasterizationModeEXT(cmdBuffer, VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR);
1247             break;
1248         case VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT:
1249             vkd.cmdSetLineStippleEnableEXT(cmdBuffer, VK_FALSE);
1250             break;
1251         case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
1252             break;
1253         case VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT:
1254             vkd.cmdSetProvokingVertexModeEXT(cmdBuffer, VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
1255             break;
1256         case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
1257         {
1258             const auto fsrSize                                      = makeExtent2D(1u, 1u);
1259             const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
1260                                                                        VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR};
1261             vkd.cmdSetFragmentShadingRateKHR(cmdBuffer, &fsrSize, combinerOps);
1262         }
1263         break;
1264         case VK_DYNAMIC_STATE_REPRESENTATIVE_FRAGMENT_TEST_ENABLE_NV:
1265             vkd.cmdSetRepresentativeFragmentTestEnableNV(cmdBuffer, VK_FALSE);
1266             break;
1267         case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT:
1268             vkd.cmdSetSampleLocationsEnableEXT(cmdBuffer, VK_FALSE);
1269             break;
1270         case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
1271             break;
1272         case VK_DYNAMIC_STATE_VIEWPORT_SWIZZLE_NV:
1273         {
1274             const VkViewportSwizzleNV defaultSwizzle{
1275                 VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV,
1276                 VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Y_NV,
1277                 VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Z_NV,
1278                 VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV,
1279             };
1280             const std::vector<vk::VkViewportSwizzleNV> idSwizzles(viewports.size(), defaultSwizzle);
1281             vkd.cmdSetViewportSwizzleNV(cmdBuffer, 0u, de::sizeU32(idSwizzles), de::dataOrNull(idSwizzles));
1282         }
1283         break;
1284         case VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_ENABLE_NV:
1285             vkd.cmdSetViewportWScalingEnableNV(cmdBuffer, VK_FALSE);
1286             break;
1287         case VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV:
1288             break;
1289         case VK_DYNAMIC_STATE_EXCLUSIVE_SCISSOR_ENABLE_NV:
1290         {
1291             const VkBool32 enable = VK_FALSE;
1292             vkd.cmdSetExclusiveScissorEnableNV(cmdBuffer, 0u, 1u, &enable);
1293         }
1294         break;
1295         case VK_DYNAMIC_STATE_EXCLUSIVE_SCISSOR_NV:
1296             break;
1297         case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT:
1298             vkd.cmdSetDiscardRectangleEnableEXT(cmdBuffer, VK_FALSE);
1299             break;
1300         case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
1301             break;
1302         case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT:
1303             break;
1304         case VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT:
1305             vkd.cmdSetAttachmentFeedbackLoopEnableEXT(cmdBuffer, 0u);
1306             break;
1307         case VK_DYNAMIC_STATE_DEPTH_CLAMP_RANGE_EXT:
1308             vkd.cmdSetDepthClampRangeEXT(cmdBuffer, VK_DEPTH_CLAMP_MODE_VIEWPORT_RANGE_EXT, nullptr);
1309             break;
1310         default:
1311             DE_ASSERT(false);
1312             break;
1313         }
1314     }
1315 }
1316 
1317 namespace
1318 {
1319 
toDX12Format(VkIndexType indexType)1320 int32_t toDX12Format(VkIndexType indexType)
1321 {
1322     // From https://learn.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format
1323     // DXGI_FORMAT_R32_UINT = 42,
1324     // DXGI_FORMAT_R16_UINT = 57,
1325     static constexpr int32_t kDXR32Uint = 42;
1326     static constexpr int32_t kDXR16Uint = 57;
1327 
1328     switch (indexType)
1329     {
1330     case VK_INDEX_TYPE_UINT32:
1331         return kDXR32Uint;
1332     case VK_INDEX_TYPE_UINT16:
1333         return kDXR16Uint;
1334     default:
1335         break;
1336     }
1337 
1338     DE_ASSERT(false);
1339     return 0;
1340 }
1341 
1342 } // anonymous namespace
1343 
IndexBufferViewD3D12(vk::VkDeviceAddress address_,uint32_t size_,vk::VkIndexType indexType_)1344 IndexBufferViewD3D12::IndexBufferViewD3D12(vk::VkDeviceAddress address_, uint32_t size_, vk::VkIndexType indexType_)
1345     : bufferAddress(static_cast<uint64_t>(address_))
1346     , size(size_)
1347     , indexType(toDX12Format(indexType_))
1348 {
1349 }
1350 
submitAndWaitWithPreprocess(const DeviceInterface & vkd,VkDevice device,VkQueue queue,VkCommandBuffer cmdBuffer,VkCommandBuffer preprocessCmdBuffer)1351 void submitAndWaitWithPreprocess(const DeviceInterface &vkd, VkDevice device, VkQueue queue, VkCommandBuffer cmdBuffer,
1352                                  VkCommandBuffer preprocessCmdBuffer)
1353 {
1354     const bool hasPreprocess = (preprocessCmdBuffer != VK_NULL_HANDLE);
1355 
1356     std::vector<VkSubmitInfo> submitInfos;
1357     submitInfos.reserve(2u); // (Optional) Preprocess and execute.
1358 
1359     Move<VkSemaphore> preprocessSemaphore;
1360     std::vector<VkSemaphore> signalWaitSemaphores;
1361     std::vector<VkPipelineStageFlags> waitStages;
1362 
1363     if (hasPreprocess)
1364     {
1365         preprocessSemaphore = createSemaphore(vkd, device);
1366         signalWaitSemaphores.push_back(*preprocessSemaphore);
1367         waitStages.push_back(VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
1368 
1369         submitInfos.push_back(VkSubmitInfo{
1370             VK_STRUCTURE_TYPE_SUBMIT_INFO,        // VkStructureType sType;
1371             nullptr,                              // const void* pNext;
1372             0u,                                   // uint32_t waitSemaphoreCount;
1373             nullptr,                              // const VkSemaphore* pWaitSemaphores;
1374             nullptr,                              // const VkPipelineStageFlags* pWaitDstStageMask;
1375             1u,                                   // uint32_t commandBufferCount;
1376             &preprocessCmdBuffer,                 // const VkCommandBuffer* pCommandBuffers;
1377             de::sizeU32(signalWaitSemaphores),    // uint32_t signalSemaphoreCount;
1378             de::dataOrNull(signalWaitSemaphores), // const VkSemaphore* pSignalSemaphores;
1379         });
1380     }
1381 
1382     DE_ASSERT(signalWaitSemaphores.size() == waitStages.size());
1383 
1384     submitInfos.push_back(VkSubmitInfo{
1385         VK_STRUCTURE_TYPE_SUBMIT_INFO,        // VkStructureType sType;
1386         nullptr,                              // const void* pNext;
1387         de::sizeU32(signalWaitSemaphores),    // uint32_t waitSemaphoreCount;
1388         de::dataOrNull(signalWaitSemaphores), // const VkSemaphore* pWaitSemaphores;
1389         de::dataOrNull(waitStages),           // const VkPipelineStageFlags* pWaitDstStageMask;
1390         1u,                                   // uint32_t commandBufferCount;
1391         &cmdBuffer,                           // const VkCommandBuffer* pCommandBuffers;
1392         0u,                                   // uint32_t signalSemaphoreCount;
1393         nullptr,                              // const VkSemaphore* pSignalSemaphores;
1394     });
1395 
1396     const auto fence(createFence(vkd, device));
1397     VK_CHECK(vkd.queueSubmit(queue, de::sizeU32(submitInfos), de::dataOrNull(submitInfos), *fence));
1398     waitForFence(vkd, device, *fence);
1399 }
1400 
1401 } // namespace DGC
1402 } // namespace vkt
1403