• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Indirect Compute Dispatch tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27 #include "vktCustomInstancesDevices.hpp"
28 #include "vkSafetyCriticalUtil.hpp"
29 
30 #include <string>
31 #include <map>
32 #include <vector>
33 
34 #include "vkDefs.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vkPlatform.hpp"
40 #include "vkPrograms.hpp"
41 #include "vkMemUtil.hpp"
42 #include "vkBarrierUtil.hpp"
43 #include "vkBuilderUtil.hpp"
44 #include "vkQueryUtil.hpp"
45 #include "vkDeviceUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkObjUtil.hpp"
48 #include "vkBufferWithMemory.hpp"
49 
50 #include "tcuVector.hpp"
51 #include "tcuVectorUtil.hpp"
52 #include "tcuTestLog.hpp"
53 #include "tcuRGBA.hpp"
54 #include "tcuStringTemplate.hpp"
55 
56 #include "deUniquePtr.hpp"
57 #include "deSharedPtr.hpp"
58 #include "deStringUtil.hpp"
59 #include "deArrayUtil.hpp"
60 
61 #include "gluShaderUtil.hpp"
62 #include "tcuCommandLine.hpp"
63 
64 #include <set>
65 
66 namespace vkt
67 {
68 namespace compute
69 {
70 namespace
71 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)72 std::vector<std::string> removeCoreExtensions (const std::vector<std::string>& supportedExtensions, const std::vector<const char*>& coreExtensions)
73 {
74 	std::vector<std::string>	nonCoreExtensions;
75 	std::set<std::string>		excludedExtensions	(coreExtensions.begin(), coreExtensions.end());
76 
77 	for (const auto & supportedExtension : supportedExtensions)
78 	{
79 		if (!de::contains(excludedExtensions, supportedExtension))
80 			nonCoreExtensions.push_back(supportedExtension);
81 	}
82 
83 	return nonCoreExtensions;
84 }
85 
86 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)87 vk::Move<vk::VkDevice> createCustomDevice (Context& context,
88 #ifdef CTS_USES_VULKANSC
89 										  const vkt::CustomInstance& customInstance,
90 #endif // CTS_USES_VULKANSC
91 										  uint32_t& queueFamilyIndex)
92 {
93 #ifdef CTS_USES_VULKANSC
94 	const vk::InstanceInterface&	instanceDriver		= customInstance.getDriver();
95 	const vk::VkPhysicalDevice		physicalDevice		= chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
96 #else
97 	const vk::InstanceInterface&	instanceDriver		= context.getInstanceInterface();
98 	const vk::VkPhysicalDevice		physicalDevice		= context.getPhysicalDevice();
99 #endif // CTS_USES_VULKANSC
100 
101 	const std::vector<vk::VkQueueFamilyProperties>	queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
102 
103 	queueFamilyIndex = 0;
104 	for (const auto &queueFamily: queueFamilies)
105 	{
106 		if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
107 			break;
108 		else
109 			queueFamilyIndex++;
110 	}
111 
112 	// One queue family without a graphics bit should be found, since this is checked in checkSupport.
113 	DE_ASSERT(queueFamilyIndex < queueFamilies.size());
114 
115 	const float										queuePriority				= 1.0f;
116 	const vk::VkDeviceQueueCreateInfo				deviceQueueCreateInfos[]	= {
117 		{
118 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
119 			DE_NULL,										// const void*					pNext;
120 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
121 			context.getUniversalQueueFamilyIndex(),			// uint32_t						queueFamilyIndex;
122 			1u,												// uint32_t						queueCount;
123 			&queuePriority,									// const float*					pQueuePriorities;
124 		},
125 		{
126 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
127 			DE_NULL,										// const void*					pNext;
128 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
129 			queueFamilyIndex,								// uint32_t						queueFamilyIndex;
130 			1u,												// uint32_t						queueCount;
131 			&queuePriority,									// const float*					pQueuePriorities;
132 		}
133 	};
134 
135 	// context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
136 	// Vulkan core. The core extensions must be removed from the list.
137 	std::vector<const char*>						coreExtensions;
138 	vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
139 	std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
140 
141 	std::vector<const char*>						extensionNames;
142 	extensionNames.reserve(nonCoreExtensions.size());
143 	for (const std::string& extension : nonCoreExtensions)
144 		extensionNames.push_back(extension.c_str());
145 
146 	const auto&										deviceFeatures2				= context.getDeviceFeatures2();
147 
148 	const void *pNext = &deviceFeatures2;
149 #ifdef CTS_USES_VULKANSC
150 	VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
151 	memReservationInfo.pNext = pNext;
152 	pNext = &memReservationInfo;
153 
154 	VkPipelineCacheCreateInfo			pcCI;
155 	std::vector<VkPipelinePoolSize>		poolSizes;
156 	if (context.getTestContext().getCommandLine().isSubProcess())
157 	{
158 		if (context.getResourceInterface()->getCacheDataSize() > 0)
159 		{
160 			pcCI =
161 			{
162 				VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,			// VkStructureType				sType;
163 				DE_NULL,												// const void*					pNext;
164 				VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
165 					VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT,	// VkPipelineCacheCreateFlags	flags;
166 				context.getResourceInterface()->getCacheDataSize(),	// deUintptr					initialDataSize;
167 				context.getResourceInterface()->getCacheData()		// const void*					pInitialData;
168 			};
169 			memReservationInfo.pipelineCacheCreateInfoCount		= 1;
170 			memReservationInfo.pPipelineCacheCreateInfos		= &pcCI;
171 		}
172 		poolSizes							= context.getResourceInterface()->getPipelinePoolSizes();
173 		if (!poolSizes.empty())
174 		{
175 			memReservationInfo.pipelinePoolSizeCount		= deUint32(poolSizes.size());
176 			memReservationInfo.pPipelinePoolSizes			= poolSizes.data();
177 		}
178 	}
179 #endif // CTS_USES_VULKANSC
180 
181 	const vk::VkDeviceCreateInfo					deviceCreateInfo			=
182 	{
183 		vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,		// VkStructureType					sType;
184 		pNext,											// const void*						pNext;
185 		(vk::VkDeviceCreateFlags)0u,					// VkDeviceCreateFlags				flags;
186 		DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos),		// uint32_t							queueCreateInfoCount;
187 		deviceQueueCreateInfos,							// const VkDeviceQueueCreateInfo*	pQueueCreateInfos;
188 		0u,												// uint32_t							enabledLayerCount;
189 		DE_NULL,										// const char* const*				ppEnabledLayerNames;
190 		static_cast<uint32_t>(extensionNames.size()),	// uint32_t							enabledExtensionCount;
191 		extensionNames.data(),							// const char* const*				ppEnabledExtensionNames;
192 		DE_NULL,										// const VkPhysicalDeviceFeatures*	pEnabledFeatures;
193 	};
194 
195 	return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
196 								   context.getPlatformInterface(),
197 #ifdef CTS_USES_VULKANSC
198 								   customInstance,
199 #else
200 								   context.getInstance(),
201 #endif
202 								   instanceDriver, physicalDevice, &deviceCreateInfo);
203 }
204 
205 enum
206 {
207 	RESULT_BLOCK_BASE_SIZE			= 4 * (int)sizeof(deUint32), // uvec3 + uint
208 	RESULT_BLOCK_NUM_PASSED_OFFSET	= 3 * (int)sizeof(deUint32),
209 	INDIRECT_COMMAND_OFFSET			= 3 * (int)sizeof(deUint32),
210 };
211 
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)212 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&	instance_interface,
213 											const vk::VkPhysicalDevice		physicalDevice,
214 											const vk::VkDeviceSize			baseSize)
215 {
216 	// TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
217 	vk::VkPhysicalDeviceProperties deviceProperties;
218 	instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
219 	vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
220 
221 	if (alignment == 0 || (baseSize % alignment == 0))
222 		return baseSize;
223 	else
224 		return (baseSize / alignment + 1)*alignment;
225 }
226 
227 struct DispatchCommand
228 {
DispatchCommandvkt::compute::__anon79bbd58d0111::DispatchCommand229 				DispatchCommand (const deIntptr		offset,
230 								 const tcu::UVec3&	numWorkGroups)
231 					: m_offset			(offset)
232 					, m_numWorkGroups	(numWorkGroups) {}
233 
234 	deIntptr	m_offset;
235 	tcu::UVec3	m_numWorkGroups;
236 };
237 
238 typedef std::vector<DispatchCommand> DispatchCommandsVec;
239 
240 struct DispatchCaseDesc
241 {
DispatchCaseDescvkt::compute::__anon79bbd58d0111::DispatchCaseDesc242 								DispatchCaseDesc (const char*					name,
243 												  const char*					description,
244 												  const deUintptr				bufferSize,
245 												  const tcu::UVec3				workGroupSize,
246 												  const DispatchCommandsVec&	dispatchCommands,
247 												  const bool					computeQueueOnly)
248 									: m_name				(name)
249 									, m_description			(description)
250 									, m_bufferSize			(bufferSize)
251 									, m_workGroupSize		(workGroupSize)
252 									, m_dispatchCommands	(dispatchCommands)
253 									, m_computeOnlyQueue	(computeQueueOnly) {}
254 
255 	const char*					m_name;
256 	const char*					m_description;
257 	const deUintptr				m_bufferSize;
258 	const tcu::UVec3			m_workGroupSize;
259 	const DispatchCommandsVec	m_dispatchCommands;
260 	const bool					m_computeOnlyQueue;
261 };
262 
263 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
264 {
265 public:
266 									IndirectDispatchInstanceBufferUpload	(Context&					context,
267 																			 const std::string&			name,
268 																			 const deUintptr			bufferSize,
269 																			 const tcu::UVec3&			workGroupSize,
270 																			 const DispatchCommandsVec& dispatchCommands,
271 																			 const bool					computeQueueOnly);
272 
~IndirectDispatchInstanceBufferUpload(void)273 	virtual							~IndirectDispatchInstanceBufferUpload	(void) {}
274 
275 	virtual tcu::TestStatus			iterate									(void);
276 
277 protected:
278 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
279 																			 const vk::DeviceInterface&     vkdi,
280 																			 const vk::BufferWithMemory&	indirectBuffer);
281 
282 	deBool							verifyResultBuffer						(const vk::BufferWithMemory&	resultBuffer,
283 																			 const vk::DeviceInterface&     vkdi,
284 																			 const vk::VkDeviceSize			resultBlockSize) const;
285 
286 	Context&						m_context;
287 	const std::string				m_name;
288 
289 	vk::VkDevice					m_device;
290 	const CustomInstance			m_customInstance;
291 	vk::Move<vk::VkDevice>			m_customDevice;
292 #ifndef CTS_USES_VULKANSC
293 	de::MovePtr<vk::DeviceDriver>	m_deviceDriver;
294 #else
295 	de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>	m_deviceDriver;
296 #endif // CTS_USES_VULKANSC
297 
298 	vk::VkQueue						m_queue;
299 	deUint32						m_queueFamilyIndex;
300 
301 	const deUintptr					m_bufferSize;
302 	const tcu::UVec3				m_workGroupSize;
303 	const DispatchCommandsVec		m_dispatchCommands;
304 
305 	de::MovePtr<vk::Allocator>		m_allocator;
306 
307 	const bool						m_computeQueueOnly;
308 private:
309 	IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
310 	IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
311 };
312 
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly)313 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&					context,
314 																			const std::string&			name,
315 																			const deUintptr				bufferSize,
316 																			const tcu::UVec3&			workGroupSize,
317 																			const DispatchCommandsVec&	dispatchCommands,
318 																			const bool					computeQueueOnly)
319 	: vkt::TestInstance		(context)
320 	, m_context				(context)
321 	, m_name				(name)
322 	, m_device				(context.getDevice())
323 	, m_customInstance		(createCustomInstanceFromContext(context))
324 	, m_queue				(context.getUniversalQueue())
325 	, m_queueFamilyIndex	(context.getUniversalQueueFamilyIndex())
326 	, m_bufferSize			(bufferSize)
327 	, m_workGroupSize		(workGroupSize)
328 	, m_dispatchCommands	(dispatchCommands)
329 	, m_computeQueueOnly	(computeQueueOnly)
330 {
331 }
332 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)333 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
334 {
335 	DE_UNREF(commandBuffer);
336 
337 	const vk::Allocation& alloc = indirectBuffer.getAllocation();
338 	deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
339 
340 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
341 	{
342 		DE_ASSERT(cmdIter->m_offset >= 0);
343 		DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
344 		DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
345 
346 		deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
347 
348 		dstPtr[0] = cmdIter->m_numWorkGroups[0];
349 		dstPtr[1] = cmdIter->m_numWorkGroups[1];
350 		dstPtr[2] = cmdIter->m_numWorkGroups[2];
351 	}
352 
353 	vk::flushAlloc(vkdi, m_device, alloc);
354 }
355 
iterate(void)356 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
357 {
358 #ifdef CTS_USES_VULKANSC
359 	const vk::InstanceInterface&	vki						= m_customInstance.getDriver();
360 #else
361 	const vk::InstanceInterface&	vki						= m_context.getInstanceInterface();
362 #endif // CTS_USES_VULKANSC
363 	tcu::TestContext& testCtx = m_context.getTestContext();
364 
365 	testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
366 	{
367 		tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
368 
369 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
370 		{
371 			testCtx.getLog()
372 				<< tcu::TestLog::Message
373 				<< cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
374 				<< tcu::TestLog::EndMessage;
375 		}
376 	}
377 
378 	if (m_computeQueueOnly)
379 	{
380 		// m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
381 		m_customDevice = createCustomDevice(m_context,
382 #ifdef CTS_USES_VULKANSC
383 											m_customInstance,
384 #endif
385 											m_queueFamilyIndex);
386 		m_device = m_customDevice.get();
387 #ifndef CTS_USES_VULKANSC
388 		m_deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), m_customInstance, m_device));
389 #else
390 		m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
391 #endif // CTS_USES_VULKANSC
392 	}
393 #ifndef CTS_USES_VULKANSC
394 	const vk::DeviceInterface& vkdi = m_context.getDeviceInterface();
395 #else
396 	const vk::DeviceInterface& vkdi = (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
397 #endif // CTS_USES_VULKANSC
398 	if (m_computeQueueOnly)
399 	{
400 		m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
401 		m_allocator		= de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
402 	}
403 	vk::Allocator&			allocator			= m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
404 
405 	// Create result buffer
406 	const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
407 	const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
408 
409 	vk::BufferWithMemory resultBuffer(
410 		vkdi, m_device, allocator,
411 		vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
412 		vk::MemoryRequirement::HostVisible);
413 
414 	{
415 		const vk::Allocation& alloc = resultBuffer.getAllocation();
416 		deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
417 
418 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
419 		{
420 			deUint8* const	dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
421 
422 			*(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
423 			*(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
424 			*(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
425 			*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
426 		}
427 
428 		vk::flushAlloc(vkdi, m_device, alloc);
429 	}
430 
431 	// Create verify compute shader
432 	const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
433 		vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
434 
435 	// Create descriptorSetLayout
436 	vk::DescriptorSetLayoutBuilder layoutBuilder;
437 	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
438 	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
439 
440 	// Create compute pipeline
441 	const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(vkdi, m_device, *descriptorSetLayout));
442 	const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(vkdi, m_device, *pipelineLayout, *verifyShader));
443 
444 	// Create descriptor pool
445 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(
446 		vk::DescriptorPoolBuilder()
447 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
448 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
449 
450 	const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
451 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
452 
453 	// Create command buffer
454 	const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
455 	const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
456 
457 	// Begin recording commands
458 	beginCommandBuffer(vkdi, *cmdBuffer);
459 
460 	// Create indirect buffer
461 	vk::BufferWithMemory indirectBuffer(
462 		vkdi, m_device, allocator,
463 		vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
464 		vk::MemoryRequirement::HostVisible);
465 	fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
466 
467 	// Bind compute pipeline
468 	vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
469 
470 	// Allocate descriptor sets
471 	typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
472 	std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
473 
474 	vk::VkDeviceSize curOffset = 0;
475 
476 	// Create descriptor sets
477 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
478 	{
479 		descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
480 									makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
481 
482 		const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
483 
484 		vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
485 		descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
486 		descriptorSetBuilder.update(vkdi, m_device);
487 
488 		// Bind descriptor set
489 		vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
490 
491 		// Dispatch indirect compute command
492 		vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
493 
494 		curOffset += resultBlockSize;
495 	}
496 
497 	// Insert memory barrier
498 	vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
499 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
500 										  1, &ssboPostBarrier,
501 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
502 
503 	// End recording commands
504 	endCommandBuffer(vkdi, *cmdBuffer);
505 
506 	// Wait for command buffer execution finish
507 	submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
508 
509 	// Check if result buffer contains valid values
510 	if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
511 		return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
512 	else
513 		return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
514 }
515 
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const516 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const vk::BufferWithMemory&	resultBuffer,
517 																 const vk::DeviceInterface&     vkdi,
518 																 const vk::VkDeviceSize			resultBlockSize) const
519 {
520 	deBool allOk = true;
521 	const vk::Allocation& alloc = resultBuffer.getAllocation();
522 	vk::invalidateAlloc(vkdi, m_device, alloc);
523 
524 	const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
525 
526 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
527 	{
528 		const DispatchCommand&	cmd = m_dispatchCommands[cmdNdx];
529 		const deUint8* const	srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
530 		const deUint32			numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
531 		const deUint32			numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
532 		const deUint32			numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
533 		const deUint32			expectedCount = numInvocationsPerGroup * numGroups;
534 
535 		if (numPassed != expectedCount)
536 		{
537 			tcu::TestContext& testCtx = m_context.getTestContext();
538 
539 			testCtx.getLog()
540 				<< tcu::TestLog::Message
541 				<< "ERROR: got invalid result for invocation " << cmdNdx
542 				<< ": got numPassed = " << numPassed << ", expected " << expectedCount
543 				<< tcu::TestLog::EndMessage;
544 
545 			allOk = false;
546 		}
547 	}
548 
549 	return allOk;
550 }
551 
552 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
553 {
554 public:
555 								IndirectDispatchCaseBufferUpload	(tcu::TestContext&			testCtx,
556 																	 const DispatchCaseDesc&	caseDesc,
557 																	 const glu::GLSLVersion		glslVersion);
558 
~IndirectDispatchCaseBufferUpload(void)559 	virtual						~IndirectDispatchCaseBufferUpload	(void) {}
560 
561 	virtual void				initPrograms						(vk::SourceCollections&		programCollection) const;
562 	virtual TestInstance*		createInstance						(Context&					context) const;
563 	virtual void				checkSupport						(Context& context) const;
564 
565 protected:
566 	const deUintptr				m_bufferSize;
567 	const tcu::UVec3			m_workGroupSize;
568 	const DispatchCommandsVec	m_dispatchCommands;
569 	const glu::GLSLVersion		m_glslVersion;
570 	const bool					m_computeOnlyQueue;
571 
572 private:
573 	IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
574 	IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
575 };
576 
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)577 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&		testCtx,
578 																	const DispatchCaseDesc& caseDesc,
579 																	const glu::GLSLVersion	glslVersion)
580 	: vkt::TestCase			(testCtx, caseDesc.m_name, caseDesc.m_description)
581 	, m_bufferSize			(caseDesc.m_bufferSize)
582 	, m_workGroupSize		(caseDesc.m_workGroupSize)
583 	, m_dispatchCommands	(caseDesc.m_dispatchCommands)
584 	, m_glslVersion			(glslVersion)
585 	, m_computeOnlyQueue	(caseDesc.m_computeOnlyQueue)
586 {
587 }
588 
initPrograms(vk::SourceCollections & programCollection) const589 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
590 {
591 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
592 
593 	std::ostringstream	verifyBuffer;
594 
595 	verifyBuffer
596 		<< versionDecl << "\n"
597 		<< "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
598 		<< "layout(set = 0, binding = 0, std430) buffer Result\n"
599 		<< "{\n"
600 		<< "    uvec3           expectedGroupCount;\n"
601 		<< "    coherent uint   numPassed;\n"
602 		<< "} result;\n"
603 		<< "void main (void)\n"
604 		<< "{\n"
605 		<< "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
606 		<< "        atomicAdd(result.numPassed, 1u);\n"
607 		<< "}\n";
608 
609 	std::map<std::string, std::string> args;
610 
611 	args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
612 	args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
613 	args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
614 
615 	std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
616 
617 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
618 }
619 
createInstance(Context & context) const620 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
621 {
622 	return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue);
623 }
624 
checkSupport(Context & context) const625 void IndirectDispatchCaseBufferUpload::checkSupport (Context& context) const
626 {
627 	// Find at least one queue family that supports compute queue but does NOT support graphics queue.
628 	if (m_computeOnlyQueue)
629 	{
630 		bool foundQueue = false;
631 		const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(
632 				context.getInstanceInterface(), context.getPhysicalDevice());
633 
634 		for (const auto &queueFamily: queueFamilies)
635 		{
636 			if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
637 				!(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
638 			{
639 				foundQueue = true;
640 				break;
641 			}
642 		}
643 		if (!foundQueue)
644 			TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
645 	}
646 }
647 
648 	class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
649 {
650 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue)651 									IndirectDispatchInstanceBufferGenerate	(Context&					context,
652 																			 const std::string&			name,
653 																			 const deUintptr			bufferSize,
654 																			 const tcu::UVec3&			workGroupSize,
655 																			 const DispatchCommandsVec&	dispatchCommands,
656 																			 const bool					computeOnlyQueue)
657 
658 										: IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands, computeOnlyQueue) {}
659 
~IndirectDispatchInstanceBufferGenerate(void)660 	virtual							~IndirectDispatchInstanceBufferGenerate	(void) {}
661 
662 protected:
663 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
664 																			 const vk::DeviceInterface&     vkdi,
665 																			 const vk::BufferWithMemory&	indirectBuffer);
666 
667 	vk::Move<vk::VkDescriptorSetLayout>	m_descriptorSetLayout;
668 	vk::Move<vk::VkDescriptorPool>		m_descriptorPool;
669 	vk::Move<vk::VkDescriptorSet>		m_descriptorSet;
670 	vk::Move<vk::VkPipelineLayout>		m_pipelineLayout;
671 	vk::Move<vk::VkPipeline>			m_computePipeline;
672 
673 private:
674 	IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
675 	IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
676 };
677 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)678 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
679 {
680 	// Create compute shader that generates data for indirect buffer
681 	const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
682 		vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
683 
684 	// Create descriptorSetLayout
685 	m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder()
686 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
687 		.build(vkdi, m_device);
688 
689 	// Create compute pipeline
690 	m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
691 	m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
692 
693 	// Create descriptor pool
694 	m_descriptorPool = vk::DescriptorPoolBuilder()
695 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
696 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
697 
698 	// Create descriptor set
699 	m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
700 
701 	const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
702 
703 	vk::DescriptorSetUpdateBuilder	descriptorSetBuilder;
704 	descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
705 	descriptorSetBuilder.update(vkdi, m_device);
706 
707 	const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
708 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
709 
710 	// Bind compute pipeline
711 	vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
712 
713 	// Bind descriptor set
714 	vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
715 
716 	// Dispatch compute command
717 	vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
718 
719 	// Insert memory barrier
720 	vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
721 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
722 										  1, &bufferBarrier,
723 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
724 }
725 
726 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
727 {
728 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)729 							IndirectDispatchCaseBufferGenerate	(tcu::TestContext&			testCtx,
730 																 const DispatchCaseDesc&	caseDesc,
731 																 const glu::GLSLVersion		glslVersion)
732 								: IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
733 
~IndirectDispatchCaseBufferGenerate(void)734 	virtual					~IndirectDispatchCaseBufferGenerate	(void) {}
735 
736 	virtual void			initPrograms						(vk::SourceCollections&		programCollection) const;
737 	virtual TestInstance*	createInstance						(Context&					context) const;
738 
739 private:
740 	IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
741 	IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
742 };
743 
initPrograms(vk::SourceCollections & programCollection) const744 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
745 {
746 	IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
747 
748 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
749 
750 	std::ostringstream computeBuffer;
751 
752 	// Header
753 	computeBuffer
754 		<< versionDecl << "\n"
755 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
756 		<< "layout(set = 0, binding = 0, std430) buffer Out\n"
757 		<< "{\n"
758 		<< "	highp uint data[];\n"
759 		<< "};\n"
760 		<< "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
761 		<< "{\n"
762 		<< "	data[offset+0u] = numWorkGroups.x;\n"
763 		<< "	data[offset+1u] = numWorkGroups.y;\n"
764 		<< "	data[offset+2u] = numWorkGroups.z;\n"
765 		<< "}\n"
766 		<< "void main (void)\n"
767 		<< "{\n";
768 
769 	// Dispatch commands
770 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
771 	{
772 		const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
773 		DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
774 
775 		computeBuffer
776 			<< "\twriteCmd(" << offs << "u, uvec3("
777 			<< cmdIter->m_numWorkGroups.x() << "u, "
778 			<< cmdIter->m_numWorkGroups.y() << "u, "
779 			<< cmdIter->m_numWorkGroups.z() << "u));\n";
780 	}
781 
782 	// Ending
783 	computeBuffer << "}\n";
784 
785 	std::string computeString = computeBuffer.str();
786 
787 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
788 }
789 
createInstance(Context & context) const790 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
791 {
792 	return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue);
793 }
794 
commandsVec(const DispatchCommand & cmd)795 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
796 {
797 	DispatchCommandsVec vec;
798 	vec.push_back(cmd);
799 	return vec;
800 }
801 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)802 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
803 								 const DispatchCommand& cmd1,
804 								 const DispatchCommand& cmd2,
805 								 const DispatchCommand& cmd3,
806 								 const DispatchCommand& cmd4)
807 {
808 	DispatchCommandsVec vec;
809 	vec.push_back(cmd0);
810 	vec.push_back(cmd1);
811 	vec.push_back(cmd2);
812 	vec.push_back(cmd3);
813 	vec.push_back(cmd4);
814 	return vec;
815 }
816 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)817 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
818 								 const DispatchCommand& cmd1,
819 								 const DispatchCommand& cmd2,
820 								 const DispatchCommand& cmd3,
821 								 const DispatchCommand& cmd4,
822 								 const DispatchCommand& cmd5,
823 								 const DispatchCommand& cmd6)
824 {
825 	DispatchCommandsVec vec;
826 	vec.push_back(cmd0);
827 	vec.push_back(cmd1);
828 	vec.push_back(cmd2);
829 	vec.push_back(cmd3);
830 	vec.push_back(cmd4);
831 	vec.push_back(cmd5);
832 	vec.push_back(cmd6);
833 	return vec;
834 }
835 
836 } // anonymous ns
837 
createIndirectComputeDispatchTests(tcu::TestContext & testCtx)838 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
839 {
840 
841 	static const DispatchCaseDesc s_dispatchCases[] =
842 	{
843 		DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
844 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false
845 		),
846 		DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
847 			commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false
848 		),
849 		DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
850 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false
851 		),
852 		DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
853 			commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false
854 		),
855 		DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
856 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false
857 		),
858 		DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
859 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false
860 		),
861 		DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
862 			commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false
863 		),
864 		DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
865 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
866 						DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
867 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
868 						DispatchCommand(40, tcu::UVec3(1, 1, 7)),
869 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
870 		),
871 		DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
872 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
873 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
874 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
875 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
876 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
877 						DispatchCommand(52, tcu::UVec3(1, 1, 4)),
878 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
879 		),
880 	};
881 
882 	de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
883 
884 	tcu::TestCaseGroup* const	groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
885 	indirectComputeDispatchTests->addChild(groupBufferUpload);
886 
887 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
888 	{
889 		DispatchCaseDesc desc = s_dispatchCases[ndx];
890 		std::string computeName = std::string(desc.m_name) + std::string("_compute_only_queue");
891 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), desc.m_description, desc.m_bufferSize, desc.m_workGroupSize,
892 															desc.m_dispatchCommands, true);
893 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, desc, glu::GLSL_VERSION_310_ES));
894 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES));
895 	}
896 
897 	tcu::TestCaseGroup* const	groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
898 	indirectComputeDispatchTests->addChild(groupBufferGenerate);
899 
900 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
901 	{
902 		DispatchCaseDesc desc = s_dispatchCases[ndx];
903 		std::string computeName = std::string(desc.m_name) + std::string("_compute_only_queue");
904 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), desc.m_description, desc.m_bufferSize, desc.m_workGroupSize,
905 															desc.m_dispatchCommands, true);
906 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, desc, glu::GLSL_VERSION_310_ES));
907 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES));
908 	}
909 
910 	return indirectComputeDispatchTests.release();
911 }
912 
913 } // compute
914 } // vkt
915