1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Indirect Compute Dispatch tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeIndirectComputeDispatchTests.hpp"
28 #include "vktComputeTestsUtil.hpp"
29 #include "vktCustomInstancesDevices.hpp"
30 #include "vkSafetyCriticalUtil.hpp"
31
32 #include <string>
33 #include <map>
34 #include <vector>
35
36 #include "vkDefs.hpp"
37 #include "vkRef.hpp"
38 #include "vkRefUtil.hpp"
39 #include "vktTestCase.hpp"
40 #include "vktTestCaseUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkBarrierUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkQueryUtil.hpp"
47 #include "vkDeviceUtil.hpp"
48 #include "vkCmdUtil.hpp"
49 #include "vkObjUtil.hpp"
50 #include "vkBufferWithMemory.hpp"
51
52 #include "tcuVector.hpp"
53 #include "tcuVectorUtil.hpp"
54 #include "tcuTestLog.hpp"
55 #include "tcuRGBA.hpp"
56 #include "tcuStringTemplate.hpp"
57
58 #include "deUniquePtr.hpp"
59 #include "deSharedPtr.hpp"
60 #include "deStringUtil.hpp"
61 #include "deArrayUtil.hpp"
62
63 #include "gluShaderUtil.hpp"
64 #include "tcuCommandLine.hpp"
65
66 #include <set>
67
68 namespace vkt
69 {
70 namespace compute
71 {
72 namespace
73 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)74 std::vector<std::string> removeCoreExtensions (const std::vector<std::string>& supportedExtensions, const std::vector<const char*>& coreExtensions)
75 {
76 std::vector<std::string> nonCoreExtensions;
77 std::set<std::string> excludedExtensions (coreExtensions.begin(), coreExtensions.end());
78
79 for (const auto & supportedExtension : supportedExtensions)
80 {
81 if (!de::contains(excludedExtensions, supportedExtension))
82 nonCoreExtensions.push_back(supportedExtension);
83 }
84
85 return nonCoreExtensions;
86 }
87
88 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)89 vk::Move<vk::VkDevice> createCustomDevice (Context& context,
90 #ifdef CTS_USES_VULKANSC
91 const vkt::CustomInstance& customInstance,
92 #endif // CTS_USES_VULKANSC
93 uint32_t& queueFamilyIndex)
94 {
95 #ifdef CTS_USES_VULKANSC
96 const vk::InstanceInterface& instanceDriver = customInstance.getDriver();
97 const vk::VkPhysicalDevice physicalDevice = chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
98 #else
99 const vk::InstanceInterface& instanceDriver = context.getInstanceInterface();
100 const vk::VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
101 #endif // CTS_USES_VULKANSC
102
103 const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
104
105 queueFamilyIndex = 0;
106 for (const auto &queueFamily: queueFamilies)
107 {
108 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
109 break;
110 else
111 queueFamilyIndex++;
112 }
113
114 // One queue family without a graphics bit should be found, since this is checked in checkSupport.
115 DE_ASSERT(queueFamilyIndex < queueFamilies.size());
116
117 const float queuePriority = 1.0f;
118 const vk::VkDeviceQueueCreateInfo deviceQueueCreateInfos[] = {
119 {
120 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
121 DE_NULL, // const void* pNext;
122 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
123 context.getUniversalQueueFamilyIndex(), // uint32_t queueFamilyIndex;
124 1u, // uint32_t queueCount;
125 &queuePriority, // const float* pQueuePriorities;
126 },
127 {
128 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
129 DE_NULL, // const void* pNext;
130 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
131 queueFamilyIndex, // uint32_t queueFamilyIndex;
132 1u, // uint32_t queueCount;
133 &queuePriority, // const float* pQueuePriorities;
134 }
135 };
136
137 // context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
138 // Vulkan core. The core extensions must be removed from the list.
139 std::vector<const char*> coreExtensions;
140 vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
141 std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
142
143 std::vector<const char*> extensionNames;
144 extensionNames.reserve(nonCoreExtensions.size());
145 for (const std::string& extension : nonCoreExtensions)
146 extensionNames.push_back(extension.c_str());
147
148 const auto& deviceFeatures2 = context.getDeviceFeatures2();
149
150 const void *pNext = &deviceFeatures2;
151 #ifdef CTS_USES_VULKANSC
152 VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
153 memReservationInfo.pNext = pNext;
154 pNext = &memReservationInfo;
155
156 VkPipelineCacheCreateInfo pcCI;
157 std::vector<VkPipelinePoolSize> poolSizes;
158 if (context.getTestContext().getCommandLine().isSubProcess())
159 {
160 if (context.getResourceInterface()->getCacheDataSize() > 0)
161 {
162 pcCI =
163 {
164 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
165 DE_NULL, // const void* pNext;
166 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
167 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
168 context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize;
169 context.getResourceInterface()->getCacheData() // const void* pInitialData;
170 };
171 memReservationInfo.pipelineCacheCreateInfoCount = 1;
172 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
173 }
174 poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
175 if (!poolSizes.empty())
176 {
177 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size());
178 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
179 }
180 }
181 #endif // CTS_USES_VULKANSC
182
183 const vk::VkDeviceCreateInfo deviceCreateInfo =
184 {
185 vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
186 pNext, // const void* pNext;
187 (vk::VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags;
188 DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos), // uint32_t queueCreateInfoCount;
189 deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
190 0u, // uint32_t enabledLayerCount;
191 DE_NULL, // const char* const* ppEnabledLayerNames;
192 static_cast<uint32_t>(extensionNames.size()), // uint32_t enabledExtensionCount;
193 extensionNames.data(), // const char* const* ppEnabledExtensionNames;
194 DE_NULL, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
195 };
196
197 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
198 context.getPlatformInterface(),
199 #ifdef CTS_USES_VULKANSC
200 customInstance,
201 #else
202 context.getInstance(),
203 #endif
204 instanceDriver, physicalDevice, &deviceCreateInfo);
205 }
206
207 enum
208 {
209 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint
210 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32),
211 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32),
212 };
213
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)214 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface,
215 const vk::VkPhysicalDevice physicalDevice,
216 const vk::VkDeviceSize baseSize)
217 {
218 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
219 vk::VkPhysicalDeviceProperties deviceProperties;
220 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
221 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
222
223 if (alignment == 0 || (baseSize % alignment == 0))
224 return baseSize;
225 else
226 return (baseSize / alignment + 1)*alignment;
227 }
228
229 struct DispatchCommand
230 {
DispatchCommandvkt::compute::__anonf60f344f0111::DispatchCommand231 DispatchCommand (const deIntptr offset,
232 const tcu::UVec3& numWorkGroups)
233 : m_offset (offset)
234 , m_numWorkGroups (numWorkGroups) {}
235
236 deIntptr m_offset;
237 tcu::UVec3 m_numWorkGroups;
238 };
239
240 typedef std::vector<DispatchCommand> DispatchCommandsVec;
241
242 struct DispatchCaseDesc
243 {
DispatchCaseDescvkt::compute::__anonf60f344f0111::DispatchCaseDesc244 DispatchCaseDesc (const char* name,
245 const deUintptr bufferSize,
246 const tcu::UVec3 workGroupSize,
247 const DispatchCommandsVec& dispatchCommands,
248 const bool computeQueueOnly)
249 : m_name (name)
250 , m_bufferSize (bufferSize)
251 , m_workGroupSize (workGroupSize)
252 , m_dispatchCommands (dispatchCommands)
253 , m_computeOnlyQueue (computeQueueOnly) {}
254
255 const char* m_name;
256 const deUintptr m_bufferSize;
257 const tcu::UVec3 m_workGroupSize;
258 const DispatchCommandsVec m_dispatchCommands;
259 const bool m_computeOnlyQueue;
260 };
261
262 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
263 {
264 public:
265 IndirectDispatchInstanceBufferUpload (Context& context,
266 const std::string& name,
267 const deUintptr bufferSize,
268 const tcu::UVec3& workGroupSize,
269 const DispatchCommandsVec& dispatchCommands,
270 const bool computeQueueOnly,
271 const vk::ComputePipelineConstructionType computePipelineConstructionType);
272
~IndirectDispatchInstanceBufferUpload(void)273 virtual ~IndirectDispatchInstanceBufferUpload (void) {}
274
275 virtual tcu::TestStatus iterate (void);
276
277 protected:
278 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
279 const vk::DeviceInterface& vkdi,
280 const vk::BufferWithMemory& indirectBuffer);
281
282 deBool verifyResultBuffer (const vk::BufferWithMemory& resultBuffer,
283 const vk::DeviceInterface& vkdi,
284 const vk::VkDeviceSize resultBlockSize) const;
285
286 Context& m_context;
287 const std::string m_name;
288
289 vk::VkDevice m_device;
290 #ifdef CTS_USES_VULKANSC
291 const CustomInstance m_customInstance;
292 #endif // CTS_USES_VULKANSC
293 vk::Move<vk::VkDevice> m_customDevice;
294 #ifndef CTS_USES_VULKANSC
295 de::MovePtr<vk::DeviceDriver> m_deviceDriver;
296 #else
297 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver;
298 #endif // CTS_USES_VULKANSC
299
300 vk::VkQueue m_queue;
301 deUint32 m_queueFamilyIndex;
302
303 const deUintptr m_bufferSize;
304 const tcu::UVec3 m_workGroupSize;
305 const DispatchCommandsVec m_dispatchCommands;
306
307 de::MovePtr<vk::Allocator> m_allocator;
308
309 const bool m_computeQueueOnly;
310 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
311 private:
312 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
313 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
314 };
315
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly,const vk::ComputePipelineConstructionType computePipelineConstructionType)316 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context,
317 const std::string& name,
318 const deUintptr bufferSize,
319 const tcu::UVec3& workGroupSize,
320 const DispatchCommandsVec& dispatchCommands,
321 const bool computeQueueOnly,
322 const vk::ComputePipelineConstructionType computePipelineConstructionType)
323 : vkt::TestInstance (context)
324 , m_context (context)
325 , m_name (name)
326 , m_device (context.getDevice())
327 #ifdef CTS_USES_VULKANSC
328 , m_customInstance (createCustomInstanceFromContext(context))
329 #endif // CTS_USES_VULKANSC
330 , m_queue (context.getUniversalQueue())
331 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex())
332 , m_bufferSize (bufferSize)
333 , m_workGroupSize (workGroupSize)
334 , m_dispatchCommands (dispatchCommands)
335 , m_computeQueueOnly (computeQueueOnly)
336 , m_computePipelineConstructionType (computePipelineConstructionType)
337 {
338 }
339
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)340 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
341 {
342 DE_UNREF(commandBuffer);
343
344 const vk::Allocation& alloc = indirectBuffer.getAllocation();
345 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
346
347 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
348 {
349 DE_ASSERT(cmdIter->m_offset >= 0);
350 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
351 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
352
353 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
354
355 dstPtr[0] = cmdIter->m_numWorkGroups[0];
356 dstPtr[1] = cmdIter->m_numWorkGroups[1];
357 dstPtr[2] = cmdIter->m_numWorkGroups[2];
358 }
359
360 vk::flushAlloc(vkdi, m_device, alloc);
361 }
362
iterate(void)363 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
364 {
365 #ifdef CTS_USES_VULKANSC
366 const vk::InstanceInterface& vki = m_customInstance.getDriver();
367 #else
368 const vk::InstanceInterface& vki = m_context.getInstanceInterface();
369 #endif // CTS_USES_VULKANSC
370 tcu::TestContext& testCtx = m_context.getTestContext();
371
372 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
373 {
374 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
375
376 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
377 {
378 testCtx.getLog()
379 << tcu::TestLog::Message
380 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
381 << tcu::TestLog::EndMessage;
382 }
383 }
384
385 if (m_computeQueueOnly)
386 {
387 // m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
388 m_customDevice = createCustomDevice(m_context,
389 #ifdef CTS_USES_VULKANSC
390 m_customInstance,
391 #endif
392 m_queueFamilyIndex);
393 m_device = m_customDevice.get();
394 #ifndef CTS_USES_VULKANSC
395 m_deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device, m_context.getUsedApiVersion()));
396 #else
397 m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(), m_context.getUsedApiVersion()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
398 #endif // CTS_USES_VULKANSC
399 }
400 #ifndef CTS_USES_VULKANSC
401 const vk::DeviceInterface& vkdi = m_context.getDeviceInterface();
402 #else
403 const vk::DeviceInterface& vkdi = (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
404 #endif // CTS_USES_VULKANSC
405 if (m_computeQueueOnly)
406 {
407 m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
408 m_allocator = de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
409 }
410 vk::Allocator& allocator = m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
411
412 // Create result buffer
413 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
414 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
415
416 vk::BufferWithMemory resultBuffer(
417 vkdi, m_device, allocator,
418 vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
419 vk::MemoryRequirement::HostVisible);
420
421 {
422 const vk::Allocation& alloc = resultBuffer.getAllocation();
423 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
424
425 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
426 {
427 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
428
429 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
430 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
431 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
432 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
433 }
434
435 vk::flushAlloc(vkdi, m_device, alloc);
436 }
437
438 // Create descriptorSetLayout
439 vk::DescriptorSetLayoutBuilder layoutBuilder;
440 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
441 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
442
443 // Create compute pipeline
444 vk::ComputePipelineWrapper computePipeline(vkdi, m_device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"));
445 computePipeline.setDescriptorSetLayout(descriptorSetLayout.get());
446 computePipeline.buildPipeline();
447
448 // Create descriptor pool
449 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
450 vk::DescriptorPoolBuilder()
451 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
452 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
453
454 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
455 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
456
457 // Create command buffer
458 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
459 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
460
461 // Begin recording commands
462 beginCommandBuffer(vkdi, *cmdBuffer);
463
464 // Create indirect buffer
465 vk::BufferWithMemory indirectBuffer(
466 vkdi, m_device, allocator,
467 vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
468 vk::MemoryRequirement::HostVisible);
469 fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
470
471 // Bind compute pipeline
472 computePipeline.bind(*cmdBuffer);
473
474 // Allocate descriptor sets
475 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
476 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
477
478 vk::VkDeviceSize curOffset = 0;
479
480 // Create descriptor sets
481 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
482 {
483 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
484 makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
485
486 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
487
488 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
489 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
490 descriptorSetBuilder.update(vkdi, m_device);
491
492 // Bind descriptor set
493 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.getPipelineLayout(), 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
494
495 // Dispatch indirect compute command
496 vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
497
498 curOffset += resultBlockSize;
499 }
500
501 // Insert memory barrier
502 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
503 0, (const vk::VkMemoryBarrier*)DE_NULL,
504 1, &ssboPostBarrier,
505 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
506
507 // End recording commands
508 endCommandBuffer(vkdi, *cmdBuffer);
509
510 // Wait for command buffer execution finish
511 submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
512
513 // Check if result buffer contains valid values
514 if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
515 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
516 else
517 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
518 }
519
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const520 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const vk::BufferWithMemory& resultBuffer,
521 const vk::DeviceInterface& vkdi,
522 const vk::VkDeviceSize resultBlockSize) const
523 {
524 deBool allOk = true;
525 const vk::Allocation& alloc = resultBuffer.getAllocation();
526 vk::invalidateAlloc(vkdi, m_device, alloc);
527
528 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
529
530 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
531 {
532 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx];
533 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
534 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
535 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
536 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
537 const deUint32 expectedCount = numInvocationsPerGroup * numGroups;
538
539 if (numPassed != expectedCount)
540 {
541 tcu::TestContext& testCtx = m_context.getTestContext();
542
543 testCtx.getLog()
544 << tcu::TestLog::Message
545 << "ERROR: got invalid result for invocation " << cmdNdx
546 << ": got numPassed = " << numPassed << ", expected " << expectedCount
547 << tcu::TestLog::EndMessage;
548
549 allOk = false;
550 }
551 }
552
553 return allOk;
554 }
555
556 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
557 {
558 public:
559 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
560 const DispatchCaseDesc& caseDesc,
561 const glu::GLSLVersion glslVersion,
562 const vk::ComputePipelineConstructionType computePipelineConstructionType);
563
~IndirectDispatchCaseBufferUpload(void)564 virtual ~IndirectDispatchCaseBufferUpload (void) {}
565
566 virtual void initPrograms (vk::SourceCollections& programCollection) const;
567 virtual TestInstance* createInstance (Context& context) const;
568 virtual void checkSupport (Context& context) const;
569
570 protected:
571 const deUintptr m_bufferSize;
572 const tcu::UVec3 m_workGroupSize;
573 const DispatchCommandsVec m_dispatchCommands;
574 const glu::GLSLVersion m_glslVersion;
575 const bool m_computeOnlyQueue;
576 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
577
578 private:
579 IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
580 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
581 };
582
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)583 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
584 const DispatchCaseDesc& caseDesc,
585 const glu::GLSLVersion glslVersion,
586 const vk::ComputePipelineConstructionType computePipelineConstructionType)
587 : vkt::TestCase (testCtx, caseDesc.m_name)
588 , m_bufferSize (caseDesc.m_bufferSize)
589 , m_workGroupSize (caseDesc.m_workGroupSize)
590 , m_dispatchCommands (caseDesc.m_dispatchCommands)
591 , m_glslVersion (glslVersion)
592 , m_computeOnlyQueue (caseDesc.m_computeOnlyQueue)
593 , m_computePipelineConstructionType (computePipelineConstructionType)
594 {
595 }
596
initPrograms(vk::SourceCollections & programCollection) const597 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
598 {
599 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
600
601 std::ostringstream verifyBuffer;
602
603 verifyBuffer
604 << versionDecl << "\n"
605 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
606 << "layout(set = 0, binding = 0, std430) buffer Result\n"
607 << "{\n"
608 << " uvec3 expectedGroupCount;\n"
609 << " coherent uint numPassed;\n"
610 << "} result;\n"
611 << "void main (void)\n"
612 << "{\n"
613 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
614 << " atomicAdd(result.numPassed, 1u);\n"
615 << "}\n";
616
617 std::map<std::string, std::string> args;
618
619 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
620 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
621 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
622
623 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
624
625 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
626 }
627
createInstance(Context & context) const628 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
629 {
630 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType);
631 }
632
checkSupport(Context & context) const633 void IndirectDispatchCaseBufferUpload::checkSupport (Context& context) const
634 {
635 // Find at least one queue family that supports compute queue but does NOT support graphics queue.
636 if (m_computeOnlyQueue)
637 {
638 bool foundQueue = false;
639 const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(
640 context.getInstanceInterface(), context.getPhysicalDevice());
641
642 for (const auto &queueFamily: queueFamilies)
643 {
644 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
645 !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
646 {
647 foundQueue = true;
648 break;
649 }
650 }
651 if (!foundQueue)
652 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
653 }
654
655 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
656 }
657
658 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
659 {
660 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue,const vk::ComputePipelineConstructionType computePipelineConstructionType)661 IndirectDispatchInstanceBufferGenerate (Context& context,
662 const std::string& name,
663 const deUintptr bufferSize,
664 const tcu::UVec3& workGroupSize,
665 const DispatchCommandsVec& dispatchCommands,
666 const bool computeOnlyQueue,
667 const vk::ComputePipelineConstructionType computePipelineConstructionType)
668
669 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands, computeOnlyQueue, computePipelineConstructionType) {}
670
~IndirectDispatchInstanceBufferGenerate(void)671 virtual ~IndirectDispatchInstanceBufferGenerate (void) {}
672
673 protected:
674 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
675 const vk::DeviceInterface& vkdi,
676 const vk::BufferWithMemory& indirectBuffer);
677
678 vk::Move<vk::VkDescriptorSetLayout> m_descriptorSetLayout;
679 vk::Move<vk::VkDescriptorPool> m_descriptorPool;
680 vk::Move<vk::VkDescriptorSet> m_descriptorSet;
681 vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
682 vk::Move<vk::VkPipeline> m_computePipeline;
683
684 private:
685 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
686 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
687 };
688
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)689 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
690 {
691 // Create compute shader that generates data for indirect buffer
692 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
693 vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
694
695 // Create descriptorSetLayout
696 m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder()
697 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
698 .build(vkdi, m_device);
699
700 // Create compute pipeline
701 m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
702 m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
703
704 // Create descriptor pool
705 m_descriptorPool = vk::DescriptorPoolBuilder()
706 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
707 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
708
709 // Create descriptor set
710 m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
711
712 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
713
714 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
715 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
716 descriptorSetBuilder.update(vkdi, m_device);
717
718 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
719 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
720
721 // Bind compute pipeline
722 vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
723
724 // Bind descriptor set
725 vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
726
727 // Dispatch compute command
728 vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
729
730 // Insert memory barrier
731 vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
732 0, (const vk::VkMemoryBarrier*)DE_NULL,
733 1, &bufferBarrier,
734 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
735 }
736
737 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
738 {
739 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)740 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx,
741 const DispatchCaseDesc& caseDesc,
742 const glu::GLSLVersion glslVersion,
743 const vk::ComputePipelineConstructionType computePipelineConstructionType)
744 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion, computePipelineConstructionType) {}
745
~IndirectDispatchCaseBufferGenerate(void)746 virtual ~IndirectDispatchCaseBufferGenerate (void) {}
747
748 virtual void initPrograms (vk::SourceCollections& programCollection) const;
749 virtual TestInstance* createInstance (Context& context) const;
750
751 private:
752 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
753 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
754 };
755
initPrograms(vk::SourceCollections & programCollection) const756 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
757 {
758 IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
759
760 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
761
762 std::ostringstream computeBuffer;
763
764 // Header
765 computeBuffer
766 << versionDecl << "\n"
767 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
768 << "layout(set = 0, binding = 0, std430) buffer Out\n"
769 << "{\n"
770 << " highp uint data[];\n"
771 << "};\n"
772 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
773 << "{\n"
774 << " data[offset+0u] = numWorkGroups.x;\n"
775 << " data[offset+1u] = numWorkGroups.y;\n"
776 << " data[offset+2u] = numWorkGroups.z;\n"
777 << "}\n"
778 << "void main (void)\n"
779 << "{\n";
780
781 // Dispatch commands
782 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
783 {
784 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
785 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
786
787 computeBuffer
788 << "\twriteCmd(" << offs << "u, uvec3("
789 << cmdIter->m_numWorkGroups.x() << "u, "
790 << cmdIter->m_numWorkGroups.y() << "u, "
791 << cmdIter->m_numWorkGroups.z() << "u));\n";
792 }
793
794 // Ending
795 computeBuffer << "}\n";
796
797 std::string computeString = computeBuffer.str();
798
799 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
800 }
801
createInstance(Context & context) const802 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
803 {
804 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType);
805 }
806
commandsVec(const DispatchCommand & cmd)807 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
808 {
809 DispatchCommandsVec vec;
810 vec.push_back(cmd);
811 return vec;
812 }
813
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)814 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
815 const DispatchCommand& cmd1,
816 const DispatchCommand& cmd2,
817 const DispatchCommand& cmd3,
818 const DispatchCommand& cmd4)
819 {
820 DispatchCommandsVec vec;
821 vec.push_back(cmd0);
822 vec.push_back(cmd1);
823 vec.push_back(cmd2);
824 vec.push_back(cmd3);
825 vec.push_back(cmd4);
826 return vec;
827 }
828
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)829 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
830 const DispatchCommand& cmd1,
831 const DispatchCommand& cmd2,
832 const DispatchCommand& cmd3,
833 const DispatchCommand& cmd4,
834 const DispatchCommand& cmd5,
835 const DispatchCommand& cmd6)
836 {
837 DispatchCommandsVec vec;
838 vec.push_back(cmd0);
839 vec.push_back(cmd1);
840 vec.push_back(cmd2);
841 vec.push_back(cmd3);
842 vec.push_back(cmd4);
843 vec.push_back(cmd5);
844 vec.push_back(cmd6);
845 return vec;
846 }
847
848 } // anonymous ns
849
createIndirectComputeDispatchTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)850 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
851 {
852
853 static const DispatchCaseDesc s_dispatchCases[] =
854 {
855 // Single invocation only from offset 0
856 DispatchCaseDesc("single_invocation", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
857 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false
858 ),
859 // Multiple groups dispatched from offset 0
860 DispatchCaseDesc("multiple_groups", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
861 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false
862 ),
863 // Multiple groups of size 2x3x1 from offset 0
864 DispatchCaseDesc("multiple_groups_multiple_invocations", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
865 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false
866 ),
867 DispatchCaseDesc("small_offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
868 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false
869 ),
870 DispatchCaseDesc("large_offset", (2 << 20), tcu::UVec3(1, 1, 1),
871 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false
872 ),
873 DispatchCaseDesc("large_offset_multiple_invocations", (2 << 20), tcu::UVec3(2, 3, 1),
874 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false
875 ),
876 DispatchCaseDesc("empty_command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
877 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false
878 ),
879 // Dispatch multiple compute commands from single buffer
880 DispatchCaseDesc("multi_dispatch", 1 << 10, tcu::UVec3(3, 1, 2),
881 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
882 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
883 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
884 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
885 DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
886 ),
887 // Dispatch multiple compute commands from single buffer
888 DispatchCaseDesc("multi_dispatch_reuse_command", 1 << 10, tcu::UVec3(3, 1, 2),
889 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
890 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
891 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
892 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
893 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
894 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
895 DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
896 ),
897 };
898
899 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch"));
900
901 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer");
902 indirectComputeDispatchTests->addChild(groupBufferUpload);
903
904 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
905 {
906 DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
907 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
908 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize,
909 caseDesc.m_dispatchCommands, true);
910 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
911 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
912 }
913
914 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute");
915 indirectComputeDispatchTests->addChild(groupBufferGenerate);
916
917 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
918 {
919 DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
920 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
921 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize,
922 caseDesc.m_dispatchCommands, true);
923 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
924 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
925 }
926
927 return indirectComputeDispatchTests.release();
928 }
929
930 } // compute
931 } // vkt
932