• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief Indirect Compute Dispatch tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeIndirectComputeDispatchTests.hpp"
28 #include "vktComputeTestsUtil.hpp"
29 #include "vktCustomInstancesDevices.hpp"
30 #include "vkSafetyCriticalUtil.hpp"
31 
32 #include <string>
33 #include <map>
34 #include <vector>
35 
36 #include "vkDefs.hpp"
37 #include "vkRef.hpp"
38 #include "vkRefUtil.hpp"
39 #include "vktTestCase.hpp"
40 #include "vktTestCaseUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkBarrierUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkQueryUtil.hpp"
47 #include "vkDeviceUtil.hpp"
48 #include "vkCmdUtil.hpp"
49 #include "vkObjUtil.hpp"
50 #include "vkBufferWithMemory.hpp"
51 
52 #include "tcuVector.hpp"
53 #include "tcuVectorUtil.hpp"
54 #include "tcuTestLog.hpp"
55 #include "tcuRGBA.hpp"
56 #include "tcuStringTemplate.hpp"
57 
58 #include "deUniquePtr.hpp"
59 #include "deSharedPtr.hpp"
60 #include "deStringUtil.hpp"
61 #include "deArrayUtil.hpp"
62 
63 #include "gluShaderUtil.hpp"
64 #include "tcuCommandLine.hpp"
65 
66 #include <set>
67 
68 namespace vkt
69 {
70 namespace compute
71 {
72 namespace
73 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)74 std::vector<std::string> removeCoreExtensions (const std::vector<std::string>& supportedExtensions, const std::vector<const char*>& coreExtensions)
75 {
76 	std::vector<std::string>	nonCoreExtensions;
77 	std::set<std::string>		excludedExtensions	(coreExtensions.begin(), coreExtensions.end());
78 
79 	for (const auto & supportedExtension : supportedExtensions)
80 	{
81 		if (!de::contains(excludedExtensions, supportedExtension))
82 			nonCoreExtensions.push_back(supportedExtension);
83 	}
84 
85 	return nonCoreExtensions;
86 }
87 
88 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)89 vk::Move<vk::VkDevice> createCustomDevice (Context& context,
90 #ifdef CTS_USES_VULKANSC
91 										  const vkt::CustomInstance& customInstance,
92 #endif // CTS_USES_VULKANSC
93 										  uint32_t& queueFamilyIndex)
94 {
95 #ifdef CTS_USES_VULKANSC
96 	const vk::InstanceInterface&	instanceDriver		= customInstance.getDriver();
97 	const vk::VkPhysicalDevice		physicalDevice		= chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
98 #else
99 	const vk::InstanceInterface&	instanceDriver		= context.getInstanceInterface();
100 	const vk::VkPhysicalDevice		physicalDevice		= context.getPhysicalDevice();
101 #endif // CTS_USES_VULKANSC
102 
103 	const std::vector<vk::VkQueueFamilyProperties>	queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
104 
105 	queueFamilyIndex = 0;
106 	for (const auto &queueFamily: queueFamilies)
107 	{
108 		if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
109 			break;
110 		else
111 			queueFamilyIndex++;
112 	}
113 
114 	// One queue family without a graphics bit should be found, since this is checked in checkSupport.
115 	DE_ASSERT(queueFamilyIndex < queueFamilies.size());
116 
117 	const float										queuePriority				= 1.0f;
118 	const vk::VkDeviceQueueCreateInfo				deviceQueueCreateInfos[]	= {
119 		{
120 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
121 			DE_NULL,										// const void*					pNext;
122 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
123 			context.getUniversalQueueFamilyIndex(),			// uint32_t						queueFamilyIndex;
124 			1u,												// uint32_t						queueCount;
125 			&queuePriority,									// const float*					pQueuePriorities;
126 		},
127 		{
128 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
129 			DE_NULL,										// const void*					pNext;
130 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
131 			queueFamilyIndex,								// uint32_t						queueFamilyIndex;
132 			1u,												// uint32_t						queueCount;
133 			&queuePriority,									// const float*					pQueuePriorities;
134 		}
135 	};
136 
137 	// context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
138 	// Vulkan core. The core extensions must be removed from the list.
139 	std::vector<const char*>						coreExtensions;
140 	vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
141 	std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
142 
143 	std::vector<const char*>						extensionNames;
144 	extensionNames.reserve(nonCoreExtensions.size());
145 	for (const std::string& extension : nonCoreExtensions)
146 		extensionNames.push_back(extension.c_str());
147 
148 	const auto&										deviceFeatures2				= context.getDeviceFeatures2();
149 
150 	const void *pNext = &deviceFeatures2;
151 #ifdef CTS_USES_VULKANSC
152 	VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
153 	memReservationInfo.pNext = pNext;
154 	pNext = &memReservationInfo;
155 
156 	VkPipelineCacheCreateInfo			pcCI;
157 	std::vector<VkPipelinePoolSize>		poolSizes;
158 	if (context.getTestContext().getCommandLine().isSubProcess())
159 	{
160 		if (context.getResourceInterface()->getCacheDataSize() > 0)
161 		{
162 			pcCI =
163 			{
164 				VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,			// VkStructureType				sType;
165 				DE_NULL,												// const void*					pNext;
166 				VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
167 					VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT,	// VkPipelineCacheCreateFlags	flags;
168 				context.getResourceInterface()->getCacheDataSize(),	// deUintptr					initialDataSize;
169 				context.getResourceInterface()->getCacheData()		// const void*					pInitialData;
170 			};
171 			memReservationInfo.pipelineCacheCreateInfoCount		= 1;
172 			memReservationInfo.pPipelineCacheCreateInfos		= &pcCI;
173 		}
174 		poolSizes							= context.getResourceInterface()->getPipelinePoolSizes();
175 		if (!poolSizes.empty())
176 		{
177 			memReservationInfo.pipelinePoolSizeCount		= deUint32(poolSizes.size());
178 			memReservationInfo.pPipelinePoolSizes			= poolSizes.data();
179 		}
180 	}
181 #endif // CTS_USES_VULKANSC
182 
183 	const vk::VkDeviceCreateInfo					deviceCreateInfo			=
184 	{
185 		vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,		// VkStructureType					sType;
186 		pNext,											// const void*						pNext;
187 		(vk::VkDeviceCreateFlags)0u,					// VkDeviceCreateFlags				flags;
188 		DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos),		// uint32_t							queueCreateInfoCount;
189 		deviceQueueCreateInfos,							// const VkDeviceQueueCreateInfo*	pQueueCreateInfos;
190 		0u,												// uint32_t							enabledLayerCount;
191 		DE_NULL,										// const char* const*				ppEnabledLayerNames;
192 		static_cast<uint32_t>(extensionNames.size()),	// uint32_t							enabledExtensionCount;
193 		extensionNames.data(),							// const char* const*				ppEnabledExtensionNames;
194 		DE_NULL,										// const VkPhysicalDeviceFeatures*	pEnabledFeatures;
195 	};
196 
197 	return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
198 								   context.getPlatformInterface(),
199 #ifdef CTS_USES_VULKANSC
200 								   customInstance,
201 #else
202 								   context.getInstance(),
203 #endif
204 								   instanceDriver, physicalDevice, &deviceCreateInfo);
205 }
206 
207 enum
208 {
209 	RESULT_BLOCK_BASE_SIZE			= 4 * (int)sizeof(deUint32), // uvec3 + uint
210 	RESULT_BLOCK_NUM_PASSED_OFFSET	= 3 * (int)sizeof(deUint32),
211 	INDIRECT_COMMAND_OFFSET			= 3 * (int)sizeof(deUint32),
212 };
213 
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)214 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&	instance_interface,
215 											const vk::VkPhysicalDevice		physicalDevice,
216 											const vk::VkDeviceSize			baseSize)
217 {
218 	// TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
219 	vk::VkPhysicalDeviceProperties deviceProperties;
220 	instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
221 	vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
222 
223 	if (alignment == 0 || (baseSize % alignment == 0))
224 		return baseSize;
225 	else
226 		return (baseSize / alignment + 1)*alignment;
227 }
228 
229 struct DispatchCommand
230 {
DispatchCommandvkt::compute::__anonf60f344f0111::DispatchCommand231 				DispatchCommand (const deIntptr		offset,
232 								 const tcu::UVec3&	numWorkGroups)
233 					: m_offset			(offset)
234 					, m_numWorkGroups	(numWorkGroups) {}
235 
236 	deIntptr	m_offset;
237 	tcu::UVec3	m_numWorkGroups;
238 };
239 
240 typedef std::vector<DispatchCommand> DispatchCommandsVec;
241 
242 struct DispatchCaseDesc
243 {
DispatchCaseDescvkt::compute::__anonf60f344f0111::DispatchCaseDesc244 								DispatchCaseDesc (const char*					name,
245 												  const deUintptr				bufferSize,
246 												  const tcu::UVec3				workGroupSize,
247 												  const DispatchCommandsVec&	dispatchCommands,
248 												  const bool					computeQueueOnly)
249 									: m_name				(name)
250 									, m_bufferSize			(bufferSize)
251 									, m_workGroupSize		(workGroupSize)
252 									, m_dispatchCommands	(dispatchCommands)
253 									, m_computeOnlyQueue	(computeQueueOnly) {}
254 
255 	const char*					m_name;
256 	const deUintptr				m_bufferSize;
257 	const tcu::UVec3			m_workGroupSize;
258 	const DispatchCommandsVec	m_dispatchCommands;
259 	const bool					m_computeOnlyQueue;
260 };
261 
262 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
263 {
264 public:
265 									IndirectDispatchInstanceBufferUpload	(Context&					context,
266 																			 const std::string&			name,
267 																			 const deUintptr			bufferSize,
268 																			 const tcu::UVec3&			workGroupSize,
269 																			 const DispatchCommandsVec& dispatchCommands,
270 																			 const bool					computeQueueOnly,
271 																			 const vk::ComputePipelineConstructionType computePipelineConstructionType);
272 
~IndirectDispatchInstanceBufferUpload(void)273 	virtual							~IndirectDispatchInstanceBufferUpload	(void) {}
274 
275 	virtual tcu::TestStatus			iterate									(void);
276 
277 protected:
278 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
279 																			 const vk::DeviceInterface&     vkdi,
280 																			 const vk::BufferWithMemory&	indirectBuffer);
281 
282 	deBool							verifyResultBuffer						(const vk::BufferWithMemory&	resultBuffer,
283 																			 const vk::DeviceInterface&     vkdi,
284 																			 const vk::VkDeviceSize			resultBlockSize) const;
285 
286 	Context&							m_context;
287 	const std::string					m_name;
288 
289 	vk::VkDevice						m_device;
290 #ifdef CTS_USES_VULKANSC
291 	const CustomInstance				m_customInstance;
292 #endif // CTS_USES_VULKANSC
293 	vk::Move<vk::VkDevice>				m_customDevice;
294 #ifndef CTS_USES_VULKANSC
295 	de::MovePtr<vk::DeviceDriver>		m_deviceDriver;
296 #else
297 	de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>	m_deviceDriver;
298 #endif // CTS_USES_VULKANSC
299 
300 	vk::VkQueue							m_queue;
301 	deUint32							m_queueFamilyIndex;
302 
303 	const deUintptr						m_bufferSize;
304 	const tcu::UVec3					m_workGroupSize;
305 	const DispatchCommandsVec			m_dispatchCommands;
306 
307 	de::MovePtr<vk::Allocator>			m_allocator;
308 
309 	const bool							m_computeQueueOnly;
310 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
311 private:
312 	IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
313 	IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
314 };
315 
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly,const vk::ComputePipelineConstructionType computePipelineConstructionType)316 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&									context,
317 																			const std::string&							name,
318 																			const deUintptr								bufferSize,
319 																			const tcu::UVec3&							workGroupSize,
320 																			const DispatchCommandsVec&					dispatchCommands,
321 																			const bool									computeQueueOnly,
322 																			const vk::ComputePipelineConstructionType	computePipelineConstructionType)
323 	: vkt::TestInstance					(context)
324 	, m_context							(context)
325 	, m_name							(name)
326 	, m_device							(context.getDevice())
327 #ifdef CTS_USES_VULKANSC
328 	, m_customInstance					(createCustomInstanceFromContext(context))
329 #endif // CTS_USES_VULKANSC
330 	, m_queue							(context.getUniversalQueue())
331 	, m_queueFamilyIndex				(context.getUniversalQueueFamilyIndex())
332 	, m_bufferSize						(bufferSize)
333 	, m_workGroupSize					(workGroupSize)
334 	, m_dispatchCommands				(dispatchCommands)
335 	, m_computeQueueOnly				(computeQueueOnly)
336 	, m_computePipelineConstructionType	(computePipelineConstructionType)
337 {
338 }
339 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)340 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
341 {
342 	DE_UNREF(commandBuffer);
343 
344 	const vk::Allocation& alloc = indirectBuffer.getAllocation();
345 	deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
346 
347 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
348 	{
349 		DE_ASSERT(cmdIter->m_offset >= 0);
350 		DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
351 		DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
352 
353 		deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
354 
355 		dstPtr[0] = cmdIter->m_numWorkGroups[0];
356 		dstPtr[1] = cmdIter->m_numWorkGroups[1];
357 		dstPtr[2] = cmdIter->m_numWorkGroups[2];
358 	}
359 
360 	vk::flushAlloc(vkdi, m_device, alloc);
361 }
362 
iterate(void)363 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
364 {
365 #ifdef CTS_USES_VULKANSC
366 	const vk::InstanceInterface&	vki						= m_customInstance.getDriver();
367 #else
368 	const vk::InstanceInterface&	vki						= m_context.getInstanceInterface();
369 #endif // CTS_USES_VULKANSC
370 	tcu::TestContext& testCtx = m_context.getTestContext();
371 
372 	testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
373 	{
374 		tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
375 
376 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
377 		{
378 			testCtx.getLog()
379 				<< tcu::TestLog::Message
380 				<< cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
381 				<< tcu::TestLog::EndMessage;
382 		}
383 	}
384 
385 	if (m_computeQueueOnly)
386 	{
387 		// m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
388 		m_customDevice = createCustomDevice(m_context,
389 #ifdef CTS_USES_VULKANSC
390 											m_customInstance,
391 #endif
392 											m_queueFamilyIndex);
393 		m_device = m_customDevice.get();
394 #ifndef CTS_USES_VULKANSC
395 		m_deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device, m_context.getUsedApiVersion()));
396 #else
397 		m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(), m_context.getUsedApiVersion()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
398 #endif // CTS_USES_VULKANSC
399 	}
400 #ifndef CTS_USES_VULKANSC
401 	const vk::DeviceInterface& vkdi = m_context.getDeviceInterface();
402 #else
403 	const vk::DeviceInterface& vkdi = (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
404 #endif // CTS_USES_VULKANSC
405 	if (m_computeQueueOnly)
406 	{
407 		m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
408 		m_allocator		= de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
409 	}
410 	vk::Allocator&			allocator			= m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
411 
412 	// Create result buffer
413 	const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
414 	const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
415 
416 	vk::BufferWithMemory resultBuffer(
417 		vkdi, m_device, allocator,
418 		vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
419 		vk::MemoryRequirement::HostVisible);
420 
421 	{
422 		const vk::Allocation& alloc = resultBuffer.getAllocation();
423 		deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
424 
425 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
426 		{
427 			deUint8* const	dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
428 
429 			*(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
430 			*(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
431 			*(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
432 			*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
433 		}
434 
435 		vk::flushAlloc(vkdi, m_device, alloc);
436 	}
437 
438 	// Create descriptorSetLayout
439 	vk::DescriptorSetLayoutBuilder layoutBuilder;
440 	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
441 	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
442 
443 	// Create compute pipeline
444 	vk::ComputePipelineWrapper			computePipeline(vkdi, m_device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"));
445 	computePipeline.setDescriptorSetLayout(descriptorSetLayout.get());
446 	computePipeline.buildPipeline();
447 
448 	// Create descriptor pool
449 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(
450 		vk::DescriptorPoolBuilder()
451 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
452 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
453 
454 	const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
455 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
456 
457 	// Create command buffer
458 	const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
459 	const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
460 
461 	// Begin recording commands
462 	beginCommandBuffer(vkdi, *cmdBuffer);
463 
464 	// Create indirect buffer
465 	vk::BufferWithMemory indirectBuffer(
466 		vkdi, m_device, allocator,
467 		vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
468 		vk::MemoryRequirement::HostVisible);
469 	fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
470 
471 	// Bind compute pipeline
472 	computePipeline.bind(*cmdBuffer);
473 
474 	// Allocate descriptor sets
475 	typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
476 	std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
477 
478 	vk::VkDeviceSize curOffset = 0;
479 
480 	// Create descriptor sets
481 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
482 	{
483 		descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
484 									makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
485 
486 		const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
487 
488 		vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
489 		descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
490 		descriptorSetBuilder.update(vkdi, m_device);
491 
492 		// Bind descriptor set
493 		vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.getPipelineLayout(), 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
494 
495 		// Dispatch indirect compute command
496 		vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
497 
498 		curOffset += resultBlockSize;
499 	}
500 
501 	// Insert memory barrier
502 	vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
503 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
504 										  1, &ssboPostBarrier,
505 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
506 
507 	// End recording commands
508 	endCommandBuffer(vkdi, *cmdBuffer);
509 
510 	// Wait for command buffer execution finish
511 	submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
512 
513 	// Check if result buffer contains valid values
514 	if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
515 		return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
516 	else
517 		return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
518 }
519 
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const520 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const vk::BufferWithMemory&	resultBuffer,
521 																 const vk::DeviceInterface&     vkdi,
522 																 const vk::VkDeviceSize			resultBlockSize) const
523 {
524 	deBool allOk = true;
525 	const vk::Allocation& alloc = resultBuffer.getAllocation();
526 	vk::invalidateAlloc(vkdi, m_device, alloc);
527 
528 	const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
529 
530 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
531 	{
532 		const DispatchCommand&	cmd = m_dispatchCommands[cmdNdx];
533 		const deUint8* const	srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
534 		const deUint32			numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
535 		const deUint32			numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
536 		const deUint32			numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
537 		const deUint32			expectedCount = numInvocationsPerGroup * numGroups;
538 
539 		if (numPassed != expectedCount)
540 		{
541 			tcu::TestContext& testCtx = m_context.getTestContext();
542 
543 			testCtx.getLog()
544 				<< tcu::TestLog::Message
545 				<< "ERROR: got invalid result for invocation " << cmdNdx
546 				<< ": got numPassed = " << numPassed << ", expected " << expectedCount
547 				<< tcu::TestLog::EndMessage;
548 
549 			allOk = false;
550 		}
551 	}
552 
553 	return allOk;
554 }
555 
556 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
557 {
558 public:
559 								IndirectDispatchCaseBufferUpload	(tcu::TestContext&			testCtx,
560 																	 const DispatchCaseDesc&	caseDesc,
561 																	 const glu::GLSLVersion		glslVersion,
562 																	 const vk::ComputePipelineConstructionType computePipelineConstructionType);
563 
~IndirectDispatchCaseBufferUpload(void)564 	virtual						~IndirectDispatchCaseBufferUpload	(void) {}
565 
566 	virtual void				initPrograms						(vk::SourceCollections&		programCollection) const;
567 	virtual TestInstance*		createInstance						(Context&					context) const;
568 	virtual void				checkSupport						(Context& context) const;
569 
570 protected:
571 	const deUintptr						m_bufferSize;
572 	const tcu::UVec3					m_workGroupSize;
573 	const DispatchCommandsVec			m_dispatchCommands;
574 	const glu::GLSLVersion				m_glslVersion;
575 	const bool							m_computeOnlyQueue;
576 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
577 
578 private:
579 	IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
580 	IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
581 };
582 
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)583 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&		testCtx,
584 																	const DispatchCaseDesc& caseDesc,
585 																	const glu::GLSLVersion	glslVersion,
586 																	const vk::ComputePipelineConstructionType computePipelineConstructionType)
587 	: vkt::TestCase						(testCtx, caseDesc.m_name)
588 	, m_bufferSize						(caseDesc.m_bufferSize)
589 	, m_workGroupSize					(caseDesc.m_workGroupSize)
590 	, m_dispatchCommands				(caseDesc.m_dispatchCommands)
591 	, m_glslVersion						(glslVersion)
592 	, m_computeOnlyQueue				(caseDesc.m_computeOnlyQueue)
593 	, m_computePipelineConstructionType	(computePipelineConstructionType)
594 {
595 }
596 
initPrograms(vk::SourceCollections & programCollection) const597 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
598 {
599 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
600 
601 	std::ostringstream	verifyBuffer;
602 
603 	verifyBuffer
604 		<< versionDecl << "\n"
605 		<< "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
606 		<< "layout(set = 0, binding = 0, std430) buffer Result\n"
607 		<< "{\n"
608 		<< "    uvec3           expectedGroupCount;\n"
609 		<< "    coherent uint   numPassed;\n"
610 		<< "} result;\n"
611 		<< "void main (void)\n"
612 		<< "{\n"
613 		<< "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
614 		<< "        atomicAdd(result.numPassed, 1u);\n"
615 		<< "}\n";
616 
617 	std::map<std::string, std::string> args;
618 
619 	args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
620 	args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
621 	args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
622 
623 	std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
624 
625 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
626 }
627 
createInstance(Context & context) const628 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
629 {
630 	return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType);
631 }
632 
checkSupport(Context & context) const633 void IndirectDispatchCaseBufferUpload::checkSupport (Context& context) const
634 {
635 	// Find at least one queue family that supports compute queue but does NOT support graphics queue.
636 	if (m_computeOnlyQueue)
637 	{
638 		bool foundQueue = false;
639 		const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(
640 				context.getInstanceInterface(), context.getPhysicalDevice());
641 
642 		for (const auto &queueFamily: queueFamilies)
643 		{
644 			if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
645 				!(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
646 			{
647 				foundQueue = true;
648 				break;
649 			}
650 		}
651 		if (!foundQueue)
652 			TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
653 	}
654 
655 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
656 }
657 
658 	class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
659 {
660 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue,const vk::ComputePipelineConstructionType computePipelineConstructionType)661 									IndirectDispatchInstanceBufferGenerate	(Context&					context,
662 																			 const std::string&			name,
663 																			 const deUintptr			bufferSize,
664 																			 const tcu::UVec3&			workGroupSize,
665 																			 const DispatchCommandsVec&	dispatchCommands,
666 																			 const bool					computeOnlyQueue,
667 																			 const vk::ComputePipelineConstructionType computePipelineConstructionType)
668 
669 										: IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands, computeOnlyQueue, computePipelineConstructionType) {}
670 
~IndirectDispatchInstanceBufferGenerate(void)671 	virtual							~IndirectDispatchInstanceBufferGenerate	(void) {}
672 
673 protected:
674 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
675 																			 const vk::DeviceInterface&     vkdi,
676 																			 const vk::BufferWithMemory&	indirectBuffer);
677 
678 	vk::Move<vk::VkDescriptorSetLayout>	m_descriptorSetLayout;
679 	vk::Move<vk::VkDescriptorPool>		m_descriptorPool;
680 	vk::Move<vk::VkDescriptorSet>		m_descriptorSet;
681 	vk::Move<vk::VkPipelineLayout>		m_pipelineLayout;
682 	vk::Move<vk::VkPipeline>			m_computePipeline;
683 
684 private:
685 	IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
686 	IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
687 };
688 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)689 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
690 {
691 	// Create compute shader that generates data for indirect buffer
692 	const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
693 		vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
694 
695 	// Create descriptorSetLayout
696 	m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder()
697 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
698 		.build(vkdi, m_device);
699 
700 	// Create compute pipeline
701 	m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
702 	m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
703 
704 	// Create descriptor pool
705 	m_descriptorPool = vk::DescriptorPoolBuilder()
706 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
707 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
708 
709 	// Create descriptor set
710 	m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
711 
712 	const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
713 
714 	vk::DescriptorSetUpdateBuilder	descriptorSetBuilder;
715 	descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
716 	descriptorSetBuilder.update(vkdi, m_device);
717 
718 	const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
719 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
720 
721 	// Bind compute pipeline
722 	vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
723 
724 	// Bind descriptor set
725 	vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
726 
727 	// Dispatch compute command
728 	vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
729 
730 	// Insert memory barrier
731 	vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
732 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
733 										  1, &bufferBarrier,
734 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
735 }
736 
737 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
738 {
739 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)740 							IndirectDispatchCaseBufferGenerate	(tcu::TestContext&			testCtx,
741 																 const DispatchCaseDesc&	caseDesc,
742 																 const glu::GLSLVersion		glslVersion,
743 																 const vk::ComputePipelineConstructionType computePipelineConstructionType)
744 								: IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion, computePipelineConstructionType) {}
745 
~IndirectDispatchCaseBufferGenerate(void)746 	virtual					~IndirectDispatchCaseBufferGenerate	(void) {}
747 
748 	virtual void			initPrograms						(vk::SourceCollections&		programCollection) const;
749 	virtual TestInstance*	createInstance						(Context&					context) const;
750 
751 private:
752 	IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
753 	IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
754 };
755 
initPrograms(vk::SourceCollections & programCollection) const756 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
757 {
758 	IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
759 
760 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
761 
762 	std::ostringstream computeBuffer;
763 
764 	// Header
765 	computeBuffer
766 		<< versionDecl << "\n"
767 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
768 		<< "layout(set = 0, binding = 0, std430) buffer Out\n"
769 		<< "{\n"
770 		<< "	highp uint data[];\n"
771 		<< "};\n"
772 		<< "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
773 		<< "{\n"
774 		<< "	data[offset+0u] = numWorkGroups.x;\n"
775 		<< "	data[offset+1u] = numWorkGroups.y;\n"
776 		<< "	data[offset+2u] = numWorkGroups.z;\n"
777 		<< "}\n"
778 		<< "void main (void)\n"
779 		<< "{\n";
780 
781 	// Dispatch commands
782 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
783 	{
784 		const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
785 		DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
786 
787 		computeBuffer
788 			<< "\twriteCmd(" << offs << "u, uvec3("
789 			<< cmdIter->m_numWorkGroups.x() << "u, "
790 			<< cmdIter->m_numWorkGroups.y() << "u, "
791 			<< cmdIter->m_numWorkGroups.z() << "u));\n";
792 	}
793 
794 	// Ending
795 	computeBuffer << "}\n";
796 
797 	std::string computeString = computeBuffer.str();
798 
799 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
800 }
801 
createInstance(Context & context) const802 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
803 {
804 	return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType);
805 }
806 
commandsVec(const DispatchCommand & cmd)807 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
808 {
809 	DispatchCommandsVec vec;
810 	vec.push_back(cmd);
811 	return vec;
812 }
813 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)814 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
815 								 const DispatchCommand& cmd1,
816 								 const DispatchCommand& cmd2,
817 								 const DispatchCommand& cmd3,
818 								 const DispatchCommand& cmd4)
819 {
820 	DispatchCommandsVec vec;
821 	vec.push_back(cmd0);
822 	vec.push_back(cmd1);
823 	vec.push_back(cmd2);
824 	vec.push_back(cmd3);
825 	vec.push_back(cmd4);
826 	return vec;
827 }
828 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)829 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
830 								 const DispatchCommand& cmd1,
831 								 const DispatchCommand& cmd2,
832 								 const DispatchCommand& cmd3,
833 								 const DispatchCommand& cmd4,
834 								 const DispatchCommand& cmd5,
835 								 const DispatchCommand& cmd6)
836 {
837 	DispatchCommandsVec vec;
838 	vec.push_back(cmd0);
839 	vec.push_back(cmd1);
840 	vec.push_back(cmd2);
841 	vec.push_back(cmd3);
842 	vec.push_back(cmd4);
843 	vec.push_back(cmd5);
844 	vec.push_back(cmd6);
845 	return vec;
846 }
847 
848 } // anonymous ns
849 
createIndirectComputeDispatchTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)850 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
851 {
852 
853 	static const DispatchCaseDesc s_dispatchCases[] =
854 	{
855 		// Single invocation only from offset 0
856 		DispatchCaseDesc("single_invocation", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
857 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false
858 		),
859 		// Multiple groups dispatched from offset 0
860 		DispatchCaseDesc("multiple_groups", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
861 			commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false
862 		),
863 		// Multiple groups of size 2x3x1 from offset 0
864 		DispatchCaseDesc("multiple_groups_multiple_invocations", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
865 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false
866 		),
867 		DispatchCaseDesc("small_offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
868 			commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false
869 		),
870 		DispatchCaseDesc("large_offset", (2 << 20), tcu::UVec3(1, 1, 1),
871 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false
872 		),
873 		DispatchCaseDesc("large_offset_multiple_invocations", (2 << 20), tcu::UVec3(2, 3, 1),
874 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false
875 		),
876 		DispatchCaseDesc("empty_command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
877 			commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false
878 		),
879 		// Dispatch multiple compute commands from single buffer
880 		DispatchCaseDesc("multi_dispatch", 1 << 10, tcu::UVec3(3, 1, 2),
881 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
882 						DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
883 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
884 						DispatchCommand(40, tcu::UVec3(1, 1, 7)),
885 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
886 		),
887 		// Dispatch multiple compute commands from single buffer
888 		DispatchCaseDesc("multi_dispatch_reuse_command", 1 << 10, tcu::UVec3(3, 1, 2),
889 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
890 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
891 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
892 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
893 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
894 						DispatchCommand(52, tcu::UVec3(1, 1, 4)),
895 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
896 		),
897 	};
898 
899 	de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch"));
900 
901 	tcu::TestCaseGroup* const	groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer");
902 	indirectComputeDispatchTests->addChild(groupBufferUpload);
903 
904 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
905 	{
906 		DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
907 		std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
908 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize,
909 															caseDesc.m_dispatchCommands, true);
910 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
911 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
912 	}
913 
914 	tcu::TestCaseGroup* const	groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute");
915 	indirectComputeDispatchTests->addChild(groupBufferGenerate);
916 
917 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
918 	{
919 		DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
920 		std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
921 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize,
922 															caseDesc.m_dispatchCommands, true);
923 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
924 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
925 	}
926 
927 	return indirectComputeDispatchTests.release();
928 }
929 
930 } // compute
931 } // vkt
932