• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Indirect Compute Dispatch tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27 #include "vktCustomInstancesDevices.hpp"
28 #include "vkSafetyCriticalUtil.hpp"
29 
30 #include <string>
31 #include <map>
32 #include <vector>
33 
34 #include "vkDefs.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vktTestCase.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "vkPlatform.hpp"
40 #include "vkPrograms.hpp"
41 #include "vkMemUtil.hpp"
42 #include "vkBarrierUtil.hpp"
43 #include "vkBuilderUtil.hpp"
44 #include "vkQueryUtil.hpp"
45 #include "vkDeviceUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkObjUtil.hpp"
48 #include "vkBufferWithMemory.hpp"
49 
50 #include "tcuVector.hpp"
51 #include "tcuVectorUtil.hpp"
52 #include "tcuTestLog.hpp"
53 #include "tcuRGBA.hpp"
54 #include "tcuStringTemplate.hpp"
55 
56 #include "deUniquePtr.hpp"
57 #include "deSharedPtr.hpp"
58 #include "deStringUtil.hpp"
59 #include "deArrayUtil.hpp"
60 
61 #include "gluShaderUtil.hpp"
62 #include "tcuCommandLine.hpp"
63 
64 #include <set>
65 
66 namespace vkt
67 {
68 namespace compute
69 {
70 namespace
71 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)72 std::vector<std::string> removeCoreExtensions (const std::vector<std::string>& supportedExtensions, const std::vector<const char*>& coreExtensions)
73 {
74 	std::vector<std::string>	nonCoreExtensions;
75 	std::set<std::string>		excludedExtensions	(coreExtensions.begin(), coreExtensions.end());
76 
77 	for (const auto & supportedExtension : supportedExtensions)
78 	{
79 		if (!de::contains(excludedExtensions, supportedExtension))
80 			nonCoreExtensions.push_back(supportedExtension);
81 	}
82 
83 	return nonCoreExtensions;
84 }
85 
86 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)87 vk::Move<vk::VkDevice> createCustomDevice (Context& context,
88 #ifdef CTS_USES_VULKANSC
89 										  const vkt::CustomInstance& customInstance,
90 #endif // CTS_USES_VULKANSC
91 										  uint32_t& queueFamilyIndex)
92 {
93 #ifdef CTS_USES_VULKANSC
94 	const vk::InstanceInterface&	instanceDriver		= customInstance.getDriver();
95 	const vk::VkPhysicalDevice		physicalDevice		= chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
96 #else
97 	const vk::InstanceInterface&	instanceDriver		= context.getInstanceInterface();
98 	const vk::VkPhysicalDevice		physicalDevice		= context.getPhysicalDevice();
99 #endif // CTS_USES_VULKANSC
100 
101 	const std::vector<vk::VkQueueFamilyProperties>	queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
102 
103 	queueFamilyIndex = 0;
104 	for (const auto &queueFamily: queueFamilies)
105 	{
106 		if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
107 			break;
108 		else
109 			queueFamilyIndex++;
110 	}
111 
112 	// One queue family without a graphics bit should be found, since this is checked in checkSupport.
113 	DE_ASSERT(queueFamilyIndex < queueFamilies.size());
114 
115 	const float										queuePriority				= 1.0f;
116 	const vk::VkDeviceQueueCreateInfo				deviceQueueCreateInfos[]	= {
117 		{
118 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
119 			DE_NULL,										// const void*					pNext;
120 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
121 			context.getUniversalQueueFamilyIndex(),			// uint32_t						queueFamilyIndex;
122 			1u,												// uint32_t						queueCount;
123 			&queuePriority,									// const float*					pQueuePriorities;
124 		},
125 		{
126 			vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,	// VkStructureType				sType;
127 			DE_NULL,										// const void*					pNext;
128 			(vk::VkDeviceQueueCreateFlags)0u,				// VkDeviceQueueCreateFlags		flags;
129 			queueFamilyIndex,								// uint32_t						queueFamilyIndex;
130 			1u,												// uint32_t						queueCount;
131 			&queuePriority,									// const float*					pQueuePriorities;
132 		}
133 	};
134 
135 	// context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
136 	// Vulkan core. The core extensions must be removed from the list.
137 	std::vector<const char*>						coreExtensions;
138 	vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
139 	std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
140 
141 	std::vector<const char*>						extensionNames;
142 	extensionNames.reserve(nonCoreExtensions.size());
143 	for (const std::string& extension : nonCoreExtensions)
144 		extensionNames.push_back(extension.c_str());
145 
146 	const auto&										deviceFeatures2				= context.getDeviceFeatures2();
147 
148 	const void *pNext = &deviceFeatures2;
149 #ifdef CTS_USES_VULKANSC
150 	VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
151 	memReservationInfo.pNext = pNext;
152 	pNext = &memReservationInfo;
153 
154 	VkPipelineCacheCreateInfo			pcCI;
155 	std::vector<VkPipelinePoolSize>		poolSizes;
156 	if (context.getTestContext().getCommandLine().isSubProcess())
157 	{
158 		if (context.getResourceInterface()->getCacheDataSize() > 0)
159 		{
160 			pcCI =
161 			{
162 				VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,			// VkStructureType				sType;
163 				DE_NULL,												// const void*					pNext;
164 				VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
165 					VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT,	// VkPipelineCacheCreateFlags	flags;
166 				context.getResourceInterface()->getCacheDataSize(),	// deUintptr					initialDataSize;
167 				context.getResourceInterface()->getCacheData()		// const void*					pInitialData;
168 			};
169 			memReservationInfo.pipelineCacheCreateInfoCount		= 1;
170 			memReservationInfo.pPipelineCacheCreateInfos		= &pcCI;
171 		}
172 		poolSizes							= context.getResourceInterface()->getPipelinePoolSizes();
173 		if (!poolSizes.empty())
174 		{
175 			memReservationInfo.pipelinePoolSizeCount		= deUint32(poolSizes.size());
176 			memReservationInfo.pPipelinePoolSizes			= poolSizes.data();
177 		}
178 	}
179 #endif // CTS_USES_VULKANSC
180 
181 	const vk::VkDeviceCreateInfo					deviceCreateInfo			=
182 	{
183 		vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,		// VkStructureType					sType;
184 		pNext,											// const void*						pNext;
185 		(vk::VkDeviceCreateFlags)0u,					// VkDeviceCreateFlags				flags;
186 		DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos),		// uint32_t							queueCreateInfoCount;
187 		deviceQueueCreateInfos,							// const VkDeviceQueueCreateInfo*	pQueueCreateInfos;
188 		0u,												// uint32_t							enabledLayerCount;
189 		DE_NULL,										// const char* const*				ppEnabledLayerNames;
190 		static_cast<uint32_t>(extensionNames.size()),	// uint32_t							enabledExtensionCount;
191 		extensionNames.data(),							// const char* const*				ppEnabledExtensionNames;
192 		DE_NULL,										// const VkPhysicalDeviceFeatures*	pEnabledFeatures;
193 	};
194 
195 	return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
196 								   context.getPlatformInterface(),
197 #ifdef CTS_USES_VULKANSC
198 								   customInstance,
199 #else
200 								   context.getInstance(),
201 #endif
202 								   instanceDriver, physicalDevice, &deviceCreateInfo);
203 }
204 
205 enum
206 {
207 	RESULT_BLOCK_BASE_SIZE			= 4 * (int)sizeof(deUint32), // uvec3 + uint
208 	RESULT_BLOCK_NUM_PASSED_OFFSET	= 3 * (int)sizeof(deUint32),
209 	INDIRECT_COMMAND_OFFSET			= 3 * (int)sizeof(deUint32),
210 };
211 
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)212 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&	instance_interface,
213 											const vk::VkPhysicalDevice		physicalDevice,
214 											const vk::VkDeviceSize			baseSize)
215 {
216 	// TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
217 	vk::VkPhysicalDeviceProperties deviceProperties;
218 	instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
219 	vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
220 
221 	if (alignment == 0 || (baseSize % alignment == 0))
222 		return baseSize;
223 	else
224 		return (baseSize / alignment + 1)*alignment;
225 }
226 
227 struct DispatchCommand
228 {
DispatchCommandvkt::compute::__anonae194a740111::DispatchCommand229 				DispatchCommand (const deIntptr		offset,
230 								 const tcu::UVec3&	numWorkGroups)
231 					: m_offset			(offset)
232 					, m_numWorkGroups	(numWorkGroups) {}
233 
234 	deIntptr	m_offset;
235 	tcu::UVec3	m_numWorkGroups;
236 };
237 
238 typedef std::vector<DispatchCommand> DispatchCommandsVec;
239 
240 struct DispatchCaseDesc
241 {
DispatchCaseDescvkt::compute::__anonae194a740111::DispatchCaseDesc242 								DispatchCaseDesc (const char*					name,
243 												  const char*					description,
244 												  const deUintptr				bufferSize,
245 												  const tcu::UVec3				workGroupSize,
246 												  const DispatchCommandsVec&	dispatchCommands,
247 												  const bool					computeQueueOnly)
248 									: m_name				(name)
249 									, m_description			(description)
250 									, m_bufferSize			(bufferSize)
251 									, m_workGroupSize		(workGroupSize)
252 									, m_dispatchCommands	(dispatchCommands)
253 									, m_computeOnlyQueue	(computeQueueOnly) {}
254 
255 	const char*					m_name;
256 	const char*					m_description;
257 	const deUintptr				m_bufferSize;
258 	const tcu::UVec3			m_workGroupSize;
259 	const DispatchCommandsVec	m_dispatchCommands;
260 	const bool					m_computeOnlyQueue;
261 };
262 
263 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
264 {
265 public:
266 									IndirectDispatchInstanceBufferUpload	(Context&					context,
267 																			 const std::string&			name,
268 																			 const deUintptr			bufferSize,
269 																			 const tcu::UVec3&			workGroupSize,
270 																			 const DispatchCommandsVec& dispatchCommands,
271 																			 const bool					computeQueueOnly);
272 
~IndirectDispatchInstanceBufferUpload(void)273 	virtual							~IndirectDispatchInstanceBufferUpload	(void) {}
274 
275 	virtual tcu::TestStatus			iterate									(void);
276 
277 protected:
278 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
279 																			 const vk::DeviceInterface&     vkdi,
280 																			 const vk::BufferWithMemory&	indirectBuffer);
281 
282 	deBool							verifyResultBuffer						(const vk::BufferWithMemory&	resultBuffer,
283 																			 const vk::DeviceInterface&     vkdi,
284 																			 const vk::VkDeviceSize			resultBlockSize) const;
285 
286 	Context&						m_context;
287 	const std::string				m_name;
288 
289 	vk::VkDevice					m_device;
290 #ifdef CTS_USES_VULKANSC
291 	const CustomInstance			m_customInstance;
292 #endif // CTS_USES_VULKANSC
293 	vk::Move<vk::VkDevice>			m_customDevice;
294 #ifndef CTS_USES_VULKANSC
295 	de::MovePtr<vk::DeviceDriver>	m_deviceDriver;
296 #else
297 	de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>	m_deviceDriver;
298 #endif // CTS_USES_VULKANSC
299 
300 	vk::VkQueue						m_queue;
301 	deUint32						m_queueFamilyIndex;
302 
303 	const deUintptr					m_bufferSize;
304 	const tcu::UVec3				m_workGroupSize;
305 	const DispatchCommandsVec		m_dispatchCommands;
306 
307 	de::MovePtr<vk::Allocator>		m_allocator;
308 
309 	const bool						m_computeQueueOnly;
310 private:
311 	IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
312 	IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
313 };
314 
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly)315 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&					context,
316 																			const std::string&			name,
317 																			const deUintptr				bufferSize,
318 																			const tcu::UVec3&			workGroupSize,
319 																			const DispatchCommandsVec&	dispatchCommands,
320 																			const bool					computeQueueOnly)
321 	: vkt::TestInstance		(context)
322 	, m_context				(context)
323 	, m_name				(name)
324 	, m_device				(context.getDevice())
325 #ifdef CTS_USES_VULKANSC
326 	, m_customInstance		(createCustomInstanceFromContext(context))
327 #endif // CTS_USES_VULKANSC
328 	, m_queue				(context.getUniversalQueue())
329 	, m_queueFamilyIndex	(context.getUniversalQueueFamilyIndex())
330 	, m_bufferSize			(bufferSize)
331 	, m_workGroupSize		(workGroupSize)
332 	, m_dispatchCommands	(dispatchCommands)
333 	, m_computeQueueOnly	(computeQueueOnly)
334 {
335 }
336 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)337 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
338 {
339 	DE_UNREF(commandBuffer);
340 
341 	const vk::Allocation& alloc = indirectBuffer.getAllocation();
342 	deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
343 
344 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
345 	{
346 		DE_ASSERT(cmdIter->m_offset >= 0);
347 		DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
348 		DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
349 
350 		deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
351 
352 		dstPtr[0] = cmdIter->m_numWorkGroups[0];
353 		dstPtr[1] = cmdIter->m_numWorkGroups[1];
354 		dstPtr[2] = cmdIter->m_numWorkGroups[2];
355 	}
356 
357 	vk::flushAlloc(vkdi, m_device, alloc);
358 }
359 
iterate(void)360 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
361 {
362 #ifdef CTS_USES_VULKANSC
363 	const vk::InstanceInterface&	vki						= m_customInstance.getDriver();
364 #else
365 	const vk::InstanceInterface&	vki						= m_context.getInstanceInterface();
366 #endif // CTS_USES_VULKANSC
367 	tcu::TestContext& testCtx = m_context.getTestContext();
368 
369 	testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
370 	{
371 		tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
372 
373 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
374 		{
375 			testCtx.getLog()
376 				<< tcu::TestLog::Message
377 				<< cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
378 				<< tcu::TestLog::EndMessage;
379 		}
380 	}
381 
382 	if (m_computeQueueOnly)
383 	{
384 		// m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
385 		m_customDevice = createCustomDevice(m_context,
386 #ifdef CTS_USES_VULKANSC
387 											m_customInstance,
388 #endif
389 											m_queueFamilyIndex);
390 		m_device = m_customDevice.get();
391 #ifndef CTS_USES_VULKANSC
392 		m_deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device));
393 #else
394 		m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
395 #endif // CTS_USES_VULKANSC
396 	}
397 #ifndef CTS_USES_VULKANSC
398 	const vk::DeviceInterface& vkdi = m_context.getDeviceInterface();
399 #else
400 	const vk::DeviceInterface& vkdi = (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
401 #endif // CTS_USES_VULKANSC
402 	if (m_computeQueueOnly)
403 	{
404 		m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
405 		m_allocator		= de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
406 	}
407 	vk::Allocator&			allocator			= m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
408 
409 	// Create result buffer
410 	const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
411 	const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
412 
413 	vk::BufferWithMemory resultBuffer(
414 		vkdi, m_device, allocator,
415 		vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
416 		vk::MemoryRequirement::HostVisible);
417 
418 	{
419 		const vk::Allocation& alloc = resultBuffer.getAllocation();
420 		deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
421 
422 		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
423 		{
424 			deUint8* const	dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
425 
426 			*(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
427 			*(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
428 			*(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
429 			*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
430 		}
431 
432 		vk::flushAlloc(vkdi, m_device, alloc);
433 	}
434 
435 	// Create verify compute shader
436 	const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
437 		vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
438 
439 	// Create descriptorSetLayout
440 	vk::DescriptorSetLayoutBuilder layoutBuilder;
441 	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
442 	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
443 
444 	// Create compute pipeline
445 	const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(vkdi, m_device, *descriptorSetLayout));
446 	const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(vkdi, m_device, *pipelineLayout, *verifyShader));
447 
448 	// Create descriptor pool
449 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(
450 		vk::DescriptorPoolBuilder()
451 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
452 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
453 
454 	const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
455 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
456 
457 	// Create command buffer
458 	const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
459 	const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
460 
461 	// Begin recording commands
462 	beginCommandBuffer(vkdi, *cmdBuffer);
463 
464 	// Create indirect buffer
465 	vk::BufferWithMemory indirectBuffer(
466 		vkdi, m_device, allocator,
467 		vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
468 		vk::MemoryRequirement::HostVisible);
469 	fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
470 
471 	// Bind compute pipeline
472 	vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
473 
474 	// Allocate descriptor sets
475 	typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
476 	std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
477 
478 	vk::VkDeviceSize curOffset = 0;
479 
480 	// Create descriptor sets
481 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
482 	{
483 		descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
484 									makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
485 
486 		const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
487 
488 		vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
489 		descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
490 		descriptorSetBuilder.update(vkdi, m_device);
491 
492 		// Bind descriptor set
493 		vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
494 
495 		// Dispatch indirect compute command
496 		vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
497 
498 		curOffset += resultBlockSize;
499 	}
500 
501 	// Insert memory barrier
502 	vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
503 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
504 										  1, &ssboPostBarrier,
505 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
506 
507 	// End recording commands
508 	endCommandBuffer(vkdi, *cmdBuffer);
509 
510 	// Wait for command buffer execution finish
511 	submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
512 
513 	// Check if result buffer contains valid values
514 	if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
515 		return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
516 	else
517 		return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
518 }
519 
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const520 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const vk::BufferWithMemory&	resultBuffer,
521 																 const vk::DeviceInterface&     vkdi,
522 																 const vk::VkDeviceSize			resultBlockSize) const
523 {
524 	deBool allOk = true;
525 	const vk::Allocation& alloc = resultBuffer.getAllocation();
526 	vk::invalidateAlloc(vkdi, m_device, alloc);
527 
528 	const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
529 
530 	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
531 	{
532 		const DispatchCommand&	cmd = m_dispatchCommands[cmdNdx];
533 		const deUint8* const	srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
534 		const deUint32			numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
535 		const deUint32			numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
536 		const deUint32			numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
537 		const deUint32			expectedCount = numInvocationsPerGroup * numGroups;
538 
539 		if (numPassed != expectedCount)
540 		{
541 			tcu::TestContext& testCtx = m_context.getTestContext();
542 
543 			testCtx.getLog()
544 				<< tcu::TestLog::Message
545 				<< "ERROR: got invalid result for invocation " << cmdNdx
546 				<< ": got numPassed = " << numPassed << ", expected " << expectedCount
547 				<< tcu::TestLog::EndMessage;
548 
549 			allOk = false;
550 		}
551 	}
552 
553 	return allOk;
554 }
555 
556 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
557 {
558 public:
559 								IndirectDispatchCaseBufferUpload	(tcu::TestContext&			testCtx,
560 																	 const DispatchCaseDesc&	caseDesc,
561 																	 const glu::GLSLVersion		glslVersion);
562 
~IndirectDispatchCaseBufferUpload(void)563 	virtual						~IndirectDispatchCaseBufferUpload	(void) {}
564 
565 	virtual void				initPrograms						(vk::SourceCollections&		programCollection) const;
566 	virtual TestInstance*		createInstance						(Context&					context) const;
567 	virtual void				checkSupport						(Context& context) const;
568 
569 protected:
570 	const deUintptr				m_bufferSize;
571 	const tcu::UVec3			m_workGroupSize;
572 	const DispatchCommandsVec	m_dispatchCommands;
573 	const glu::GLSLVersion		m_glslVersion;
574 	const bool					m_computeOnlyQueue;
575 
576 private:
577 	IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
578 	IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
579 };
580 
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)581 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&		testCtx,
582 																	const DispatchCaseDesc& caseDesc,
583 																	const glu::GLSLVersion	glslVersion)
584 	: vkt::TestCase			(testCtx, caseDesc.m_name, caseDesc.m_description)
585 	, m_bufferSize			(caseDesc.m_bufferSize)
586 	, m_workGroupSize		(caseDesc.m_workGroupSize)
587 	, m_dispatchCommands	(caseDesc.m_dispatchCommands)
588 	, m_glslVersion			(glslVersion)
589 	, m_computeOnlyQueue	(caseDesc.m_computeOnlyQueue)
590 {
591 }
592 
initPrograms(vk::SourceCollections & programCollection) const593 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
594 {
595 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
596 
597 	std::ostringstream	verifyBuffer;
598 
599 	verifyBuffer
600 		<< versionDecl << "\n"
601 		<< "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
602 		<< "layout(set = 0, binding = 0, std430) buffer Result\n"
603 		<< "{\n"
604 		<< "    uvec3           expectedGroupCount;\n"
605 		<< "    coherent uint   numPassed;\n"
606 		<< "} result;\n"
607 		<< "void main (void)\n"
608 		<< "{\n"
609 		<< "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
610 		<< "        atomicAdd(result.numPassed, 1u);\n"
611 		<< "}\n";
612 
613 	std::map<std::string, std::string> args;
614 
615 	args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
616 	args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
617 	args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
618 
619 	std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
620 
621 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
622 }
623 
createInstance(Context & context) const624 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
625 {
626 	return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue);
627 }
628 
checkSupport(Context & context) const629 void IndirectDispatchCaseBufferUpload::checkSupport (Context& context) const
630 {
631 	// Find at least one queue family that supports compute queue but does NOT support graphics queue.
632 	if (m_computeOnlyQueue)
633 	{
634 		bool foundQueue = false;
635 		const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(
636 				context.getInstanceInterface(), context.getPhysicalDevice());
637 
638 		for (const auto &queueFamily: queueFamilies)
639 		{
640 			if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
641 				!(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
642 			{
643 				foundQueue = true;
644 				break;
645 			}
646 		}
647 		if (!foundQueue)
648 			TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
649 	}
650 }
651 
652 	class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
653 {
654 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue)655 									IndirectDispatchInstanceBufferGenerate	(Context&					context,
656 																			 const std::string&			name,
657 																			 const deUintptr			bufferSize,
658 																			 const tcu::UVec3&			workGroupSize,
659 																			 const DispatchCommandsVec&	dispatchCommands,
660 																			 const bool					computeOnlyQueue)
661 
662 										: IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands, computeOnlyQueue) {}
663 
~IndirectDispatchInstanceBufferGenerate(void)664 	virtual							~IndirectDispatchInstanceBufferGenerate	(void) {}
665 
666 protected:
667 	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer		commandBuffer,
668 																			 const vk::DeviceInterface&     vkdi,
669 																			 const vk::BufferWithMemory&	indirectBuffer);
670 
671 	vk::Move<vk::VkDescriptorSetLayout>	m_descriptorSetLayout;
672 	vk::Move<vk::VkDescriptorPool>		m_descriptorPool;
673 	vk::Move<vk::VkDescriptorSet>		m_descriptorSet;
674 	vk::Move<vk::VkPipelineLayout>		m_pipelineLayout;
675 	vk::Move<vk::VkPipeline>			m_computePipeline;
676 
677 private:
678 	IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
679 	IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
680 };
681 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)682 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer)
683 {
684 	// Create compute shader that generates data for indirect buffer
685 	const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
686 		vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
687 
688 	// Create descriptorSetLayout
689 	m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder()
690 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
691 		.build(vkdi, m_device);
692 
693 	// Create compute pipeline
694 	m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
695 	m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
696 
697 	// Create descriptor pool
698 	m_descriptorPool = vk::DescriptorPoolBuilder()
699 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
700 		.build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
701 
702 	// Create descriptor set
703 	m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
704 
705 	const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
706 
707 	vk::DescriptorSetUpdateBuilder	descriptorSetBuilder;
708 	descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
709 	descriptorSetBuilder.update(vkdi, m_device);
710 
711 	const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
712 		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
713 
714 	// Bind compute pipeline
715 	vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
716 
717 	// Bind descriptor set
718 	vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
719 
720 	// Dispatch compute command
721 	vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
722 
723 	// Insert memory barrier
724 	vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
725 										  0, (const vk::VkMemoryBarrier*)DE_NULL,
726 										  1, &bufferBarrier,
727 										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
728 }
729 
730 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
731 {
732 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)733 							IndirectDispatchCaseBufferGenerate	(tcu::TestContext&			testCtx,
734 																 const DispatchCaseDesc&	caseDesc,
735 																 const glu::GLSLVersion		glslVersion)
736 								: IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
737 
~IndirectDispatchCaseBufferGenerate(void)738 	virtual					~IndirectDispatchCaseBufferGenerate	(void) {}
739 
740 	virtual void			initPrograms						(vk::SourceCollections&		programCollection) const;
741 	virtual TestInstance*	createInstance						(Context&					context) const;
742 
743 private:
744 	IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
745 	IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
746 };
747 
initPrograms(vk::SourceCollections & programCollection) const748 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
749 {
750 	IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
751 
752 	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
753 
754 	std::ostringstream computeBuffer;
755 
756 	// Header
757 	computeBuffer
758 		<< versionDecl << "\n"
759 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
760 		<< "layout(set = 0, binding = 0, std430) buffer Out\n"
761 		<< "{\n"
762 		<< "	highp uint data[];\n"
763 		<< "};\n"
764 		<< "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
765 		<< "{\n"
766 		<< "	data[offset+0u] = numWorkGroups.x;\n"
767 		<< "	data[offset+1u] = numWorkGroups.y;\n"
768 		<< "	data[offset+2u] = numWorkGroups.z;\n"
769 		<< "}\n"
770 		<< "void main (void)\n"
771 		<< "{\n";
772 
773 	// Dispatch commands
774 	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
775 	{
776 		const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
777 		DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
778 
779 		computeBuffer
780 			<< "\twriteCmd(" << offs << "u, uvec3("
781 			<< cmdIter->m_numWorkGroups.x() << "u, "
782 			<< cmdIter->m_numWorkGroups.y() << "u, "
783 			<< cmdIter->m_numWorkGroups.z() << "u));\n";
784 	}
785 
786 	// Ending
787 	computeBuffer << "}\n";
788 
789 	std::string computeString = computeBuffer.str();
790 
791 	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
792 }
793 
createInstance(Context & context) const794 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
795 {
796 	return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue);
797 }
798 
commandsVec(const DispatchCommand & cmd)799 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
800 {
801 	DispatchCommandsVec vec;
802 	vec.push_back(cmd);
803 	return vec;
804 }
805 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)806 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
807 								 const DispatchCommand& cmd1,
808 								 const DispatchCommand& cmd2,
809 								 const DispatchCommand& cmd3,
810 								 const DispatchCommand& cmd4)
811 {
812 	DispatchCommandsVec vec;
813 	vec.push_back(cmd0);
814 	vec.push_back(cmd1);
815 	vec.push_back(cmd2);
816 	vec.push_back(cmd3);
817 	vec.push_back(cmd4);
818 	return vec;
819 }
820 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)821 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
822 								 const DispatchCommand& cmd1,
823 								 const DispatchCommand& cmd2,
824 								 const DispatchCommand& cmd3,
825 								 const DispatchCommand& cmd4,
826 								 const DispatchCommand& cmd5,
827 								 const DispatchCommand& cmd6)
828 {
829 	DispatchCommandsVec vec;
830 	vec.push_back(cmd0);
831 	vec.push_back(cmd1);
832 	vec.push_back(cmd2);
833 	vec.push_back(cmd3);
834 	vec.push_back(cmd4);
835 	vec.push_back(cmd5);
836 	vec.push_back(cmd6);
837 	return vec;
838 }
839 
840 } // anonymous ns
841 
createIndirectComputeDispatchTests(tcu::TestContext & testCtx)842 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
843 {
844 
845 	static const DispatchCaseDesc s_dispatchCases[] =
846 	{
847 		DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
848 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false
849 		),
850 		DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
851 			commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false
852 		),
853 		DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
854 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false
855 		),
856 		DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
857 			commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false
858 		),
859 		DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
860 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false
861 		),
862 		DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
863 			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false
864 		),
865 		DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
866 			commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false
867 		),
868 		DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
869 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
870 						DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
871 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
872 						DispatchCommand(40, tcu::UVec3(1, 1, 7)),
873 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
874 		),
875 		DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
876 			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
877 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
878 						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
879 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
880 						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
881 						DispatchCommand(52, tcu::UVec3(1, 1, 4)),
882 						DispatchCommand(52, tcu::UVec3(1, 1, 4))), false
883 		),
884 	};
885 
886 	de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
887 
888 	tcu::TestCaseGroup* const	groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
889 	indirectComputeDispatchTests->addChild(groupBufferUpload);
890 
891 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
892 	{
893 		DispatchCaseDesc desc = s_dispatchCases[ndx];
894 		std::string computeName = std::string(desc.m_name) + std::string("_compute_only_queue");
895 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), desc.m_description, desc.m_bufferSize, desc.m_workGroupSize,
896 															desc.m_dispatchCommands, true);
897 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, desc, glu::GLSL_VERSION_310_ES));
898 		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES));
899 	}
900 
901 	tcu::TestCaseGroup* const	groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
902 	indirectComputeDispatchTests->addChild(groupBufferGenerate);
903 
904 	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
905 	{
906 		DispatchCaseDesc desc = s_dispatchCases[ndx];
907 		std::string computeName = std::string(desc.m_name) + std::string("_compute_only_queue");
908 		DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), desc.m_description, desc.m_bufferSize, desc.m_workGroupSize,
909 															desc.m_dispatchCommands, true);
910 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, desc, glu::GLSL_VERSION_310_ES));
911 		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES));
912 	}
913 
914 	return indirectComputeDispatchTests.release();
915 }
916 
917 } // compute
918 } // vkt
919