• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Vulkan Performance Query Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26 
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 
37 #include "deMath.h"
38 
39 #include "tcuTestLog.hpp"
40 #include "tcuResource.hpp"
41 #include "tcuImageCompare.hpp"
42 #include "vkImageUtil.hpp"
43 #include "tcuCommandLine.hpp"
44 #include "tcuRGBA.hpp"
45 
46 namespace vkt
47 {
48 namespace QueryPool
49 {
50 namespace
51 {
52 
53 using namespace vk;
54 using namespace Draw;
55 
uuidToHex(const deUint8 uuid[])56 std::string uuidToHex(const deUint8 uuid[])
57 {
58 	const size_t	bytesPerPart[]	= {4, 2, 2, 2, 6};
59 	const deUint8*	ptr				= &uuid[0];
60 	const size_t	stringSize		= VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
61 	std::string		result;
62 
63 	result.reserve(stringSize);
64 
65 	for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
66 	{
67 		const size_t	bytesInPart		= bytesPerPart[partNdx];
68 		const size_t	symbolsInPart	= 2 * bytesInPart;
69 		deUint64		part			= 0;
70 		std::string		partString;
71 
72 		for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
73 		{
74 			part = (part << 8) | *ptr;
75 			++ptr;
76 		}
77 
78 		partString	= tcu::toHex(part).toString();
79 
80 		DE_ASSERT(partString.size() > symbolsInPart);
81 
82 		result += (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
83 
84 		if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
85 			result += '-';
86 	}
87 
88 	DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
89 	DE_ASSERT(result.size() == stringSize);
90 
91 	return result;
92 }
93 
94 class EnumerateAndValidateTest : public TestInstance
95 {
96 public:
97 						EnumerateAndValidateTest		(vkt::Context&	context, VkQueueFlagBits queueFlagBits);
98 	tcu::TestStatus		iterate							(void);
99 
100 protected:
101 	void				basicValidateCounter			(const deUint32 familyIndex);
102 
103 private:
104 	VkQueueFlagBits		m_queueFlagBits;
105 	bool				m_requiredExtensionsPresent;
106 };
107 
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)108 EnumerateAndValidateTest::EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits)
109 	: TestInstance(context)
110 	, m_queueFlagBits(queueFlagBits)
111 	, m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
112 {
113 }
114 
iterate(void)115 tcu::TestStatus EnumerateAndValidateTest::iterate (void)
116 {
117 	const InstanceInterface&					vki				= m_context.getInstanceInterface();
118 	const VkPhysicalDevice						physicalDevice	= m_context.getPhysicalDevice();
119 	const std::vector<VkQueueFamilyProperties>	queueProperties	= getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
120 
121 	for (deUint32 queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
122 	{
123 		if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
124 			continue;
125 
126 		deUint32 counterCount = 0;
127 		VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCount, DE_NULL, DE_NULL));
128 
129 		if (counterCount == 0)
130 			continue;
131 
132 		{
133 			std::vector<VkPerformanceCounterKHR>	counters			(counterCount);
134 			for (deUint32 i = 0; i < counterCount; ++i)
135 				counters[i] = vk::initVulkanStructure();
136 
137 			deUint32								counterCountRead	= counterCount;
138 			std::map<std::string, size_t>			uuidValidator;
139 
140 			if (counterCount > 1)
141 			{
142 				deUint32	incompleteCounterCount	= counterCount - 1;
143 				VkResult	result;
144 
145 				result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
146 				if (result != VK_INCOMPLETE)
147 					TCU_FAIL("VK_INCOMPLETE not returned");
148 			}
149 
150 			VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
151 
152 			if (counterCountRead != counterCount)
153 				TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
154 
155 			for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
156 			{
157 				const VkPerformanceCounterKHR&	counter			= counters[counterNdx];
158 				const std::string				uuidStr			= uuidToHex(counter.uuid);
159 
160 				if (uuidValidator.find(uuidStr) != uuidValidator.end())
161 					TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
162 				else
163 					uuidValidator[uuidStr] = counterNdx;
164 
165 				if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
166 					TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
167 
168 				if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
169 					TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
170 
171 				if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
172 					TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
173 			}
174 		}
175 		{
176 			std::vector<VkPerformanceCounterDescriptionKHR>	counterDescriptors	(counterCount);
177 			for (deUint32 i = 0; i < counterCount; ++i)
178 				counterDescriptors[i] = vk::initVulkanStructure();
179 			deUint32										counterCountRead	= counterCount;
180 
181 			VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
182 
183 			if (counterCountRead != counterCount)
184 				TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
185 
186 			for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
187 			{
188 				const VkPerformanceCounterDescriptionKHR&		counterDescriptor	= counterDescriptors[counterNdx];
189 				const VkPerformanceCounterDescriptionFlagsKHR	allowedFlags		= VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR
190 																					| VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
191 
192 				if ((counterDescriptor.flags & ~allowedFlags) != 0)
193 					TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
194 			}
195 		}
196 	}
197 
198 	return tcu::TestStatus::pass("Pass");
199 }
200 
201 class QueryTestBase : public TestInstance
202 {
203 public:
204 						QueryTestBase	(vkt::Context&	context);
205 
206 protected:
207 
208 	void				setupCounters			(void);
209 	Move<VkQueryPool>	createQueryPool			(deUint32 enabledCounterOffset, deUint32 enabledCounterStride);
210 	bool				acquireProfilingLock	(void);
211 	void				releaseProfilingLock	(void);
212 	bool				verifyQueryResults		(VkQueryPool queryPool);
213 	deUint32			getRequiredNumerOfPasses(void);
214 
215 private:
216 
217 	bool									m_requiredExtensionsPresent;
218 	deUint32								m_requiredNumerOfPasses;
219 	std::map<deUint64, deUint32>			m_enabledCountersCountMap;		// number of counters that were enabled per query pool
220 	std::vector<VkPerformanceCounterKHR>	m_counters;						// counters provided by the device
221 };
222 
QueryTestBase(vkt::Context & context)223 QueryTestBase::QueryTestBase(vkt::Context& context)
224 	: TestInstance	(context)
225 	, m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
226 	, m_requiredNumerOfPasses(0)
227 {
228 }
229 
setupCounters()230 void QueryTestBase::setupCounters()
231 {
232 	const InstanceInterface&	vki					= m_context.getInstanceInterface();
233 	const VkPhysicalDevice		physicalDevice		= m_context.getPhysicalDevice();
234 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
235 	deUint32					queueFamilyIndex	= cmdPoolCreateInfo.queueFamilyIndex;
236 	deUint32					counterCount;
237 
238 	if (!m_context.getPerformanceQueryFeatures().performanceCounterQueryPools)
239 		TCU_THROW(NotSupportedError, "Performance counter query pools feature not supported");
240 
241 	// get the number of supported counters
242 	VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, NULL, NULL));
243 
244 	if (!counterCount)
245 		TCU_THROW(NotSupportedError, "QualityWarning: there are no performance counters");
246 
247 	// get supported counters
248 	m_counters.resize(counterCount, vk::initVulkanStructure());
249 	VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, &m_counters[0], DE_NULL));
250 }
251 
createQueryPool(deUint32 enabledCounterOffset,deUint32 enabledCounterStride)252 Move<VkQueryPool> QueryTestBase::createQueryPool(deUint32 enabledCounterOffset, deUint32 enabledCounterStride)
253 {
254 	const InstanceInterface&	vki					= m_context.getInstanceInterface();
255 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
256 	const VkPhysicalDevice		physicalDevice		= m_context.getPhysicalDevice();
257 	const VkDevice				device				= m_context.getDevice();
258 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
259 	const deUint32				counterCount		= (deUint32)m_counters.size();
260 	deUint32					enabledIndex		= enabledCounterOffset ? 0 : enabledCounterStride;
261 	std::vector<deUint32>		enabledCounters;
262 
263 	// enable every <enabledCounterStride> counter that has command or render pass scope
264 	for (deUint32 i = 0; i < counterCount; i++)
265 	{
266 		// handle offset
267 		if (enabledCounterOffset)
268 		{
269 			if (enabledCounterOffset == enabledIndex)
270 			{
271 				// disable handling offset
272 				enabledCounterOffset = 0;
273 
274 				// eneble next index in stride condition
275 				enabledIndex = enabledCounterStride;
276 			}
277 			else
278 			{
279 				++enabledIndex;
280 				continue;
281 			}
282 		}
283 
284 		// handle stride
285 		if (enabledIndex == enabledCounterStride)
286 		{
287 			enabledCounters.push_back(i);
288 			enabledIndex = 0;
289 		}
290 		else
291 			++enabledIndex;
292 	}
293 
294 	// get number of counters that were enabled for this query pool
295 	deUint32 enabledCountersCount = static_cast<deUint32>(enabledCounters.size());
296 	if (!enabledCountersCount)
297 		TCU_THROW(NotSupportedError, "QualityWarning: no performance counters");
298 
299 	// define performance query
300 	VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo =
301 	{
302 		VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
303 		NULL,
304 		cmdPoolCreateInfo.queueFamilyIndex,			// queue family that this performance query is performed on
305 		enabledCountersCount,						// number of counters to enable
306 		&enabledCounters[0]							// array of indices of counters to enable
307 	};
308 
309 	// get the number of passes counters will require
310 	vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &m_requiredNumerOfPasses);
311 
312 	// create query pool
313 	VkQueryPoolCreateInfo queryPoolCreateInfo =
314 	{
315 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
316 		&performanceQueryCreateInfo,
317 		0,											// flags
318 		VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,		// new query type
319 		1,											// queryCount
320 		0
321 	};
322 
323 	Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
324 
325 	// memorize number of enabled counters for this query pool
326 	m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
327 
328 	return queryPool;
329 }
330 
acquireProfilingLock()331 bool QueryTestBase::acquireProfilingLock()
332 {
333 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
334 	const VkDevice				device	= m_context.getDevice();
335 
336 	// acquire profiling lock before we record command buffers
337 	VkAcquireProfilingLockInfoKHR lockInfo =
338 	{
339 		VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
340 		NULL,
341 		0,
342 		2000000000ull					// wait 2s for the lock
343 	};
344 
345 	VkResult result = vkd.acquireProfilingLockKHR(device, &lockInfo);
346 	if (result == VK_TIMEOUT)
347 	{
348 		m_context.getTestContext().getLog() << tcu::TestLog::Message
349 			<< "Timeout reached, profiling lock wasn't acquired - test had to end earlier"
350 			<< tcu::TestLog::EndMessage;
351 		return false;
352 	}
353 	if (result != VK_SUCCESS)
354 		TCU_FAIL("Profiling lock wasn't acquired");
355 
356 	return true;
357 }
358 
releaseProfilingLock()359 void QueryTestBase::releaseProfilingLock()
360 {
361 	const DeviceInterface&	vkd		= m_context.getDeviceInterface();
362 	const VkDevice			device	= m_context.getDevice();
363 
364 	// release the profiling lock after the command buffer is no longer in the pending state
365 	vkd.releaseProfilingLockKHR(device);
366 }
367 
verifyQueryResults(VkQueryPool queryPool)368 bool QueryTestBase::verifyQueryResults(VkQueryPool queryPool)
369 {
370 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
371 	const VkDevice				device	= m_context.getDevice();
372 
373 	// create an array to hold the results of all counters
374 	deUint32 enabledCounterCount = m_enabledCountersCountMap[queryPool.getInternal()];
375 	std::vector<VkPerformanceCounterResultKHR> recordedCounters(enabledCounterCount);
376 
377 	// verify that query result can be retrieved
378 	VkResult result = vkd.getQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
379 		&recordedCounters[0], sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT);
380 	if (result == VK_NOT_READY)
381 	{
382 		m_context.getTestContext().getLog() << tcu::TestLog::Message
383 			<< "Pass but result is not ready"
384 			<< tcu::TestLog::EndMessage;
385 		return true;
386 	}
387 	return (result == VK_SUCCESS);
388 }
389 
getRequiredNumerOfPasses()390 deUint32 QueryTestBase::getRequiredNumerOfPasses()
391 {
392 	return m_requiredNumerOfPasses;
393 }
394 
395 // Base class for all graphic tests
396 class GraphicQueryTestBase : public QueryTestBase
397 {
398 public:
399 	GraphicQueryTestBase(vkt::Context&	context);
400 
401 protected:
402 	void initStateObjects(void);
403 
404 protected:
405 	Move<VkPipeline>		m_pipeline;
406 	Move<VkPipelineLayout>	m_pipelineLayout;
407 
408 	de::SharedPtr<Image>	m_colorAttachmentImage;
409 	Move<VkImageView>		m_attachmentView;
410 
411 	Move<VkRenderPass>		m_renderPass;
412 	Move<VkFramebuffer>		m_framebuffer;
413 
414 	de::SharedPtr<Buffer>	m_vertexBuffer;
415 
416 	VkFormat				m_colorAttachmentFormat;
417 	deUint32				m_size;
418 };
419 
GraphicQueryTestBase(vkt::Context & context)420 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context& context)
421 	: QueryTestBase(context)
422 	, m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
423 	, m_size(32)
424 {
425 }
426 
initStateObjects(void)427 void GraphicQueryTestBase::initStateObjects(void)
428 {
429 	const VkDevice				device	= m_context.getDevice();
430 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
431 
432 	//attachment images and views
433 	{
434 		VkExtent3D imageExtent =
435 		{
436 			m_size,		// width
437 			m_size,		// height
438 			1			// depth
439 		};
440 
441 		const ImageCreateInfo colorImageCreateInfo(VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1,
442 												   VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL,
443 												   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
444 
445 		m_colorAttachmentImage = Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
446 													   m_context.getUniversalQueueFamilyIndex());
447 
448 		const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D, m_colorAttachmentFormat);
449 		m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
450 	}
451 
452 	// renderpass and framebuffer
453 	{
454 		RenderPassCreateInfo renderPassCreateInfo;
455 		renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat,				// format
456 																 VK_SAMPLE_COUNT_1_BIT,					// samples
457 																 VK_ATTACHMENT_LOAD_OP_CLEAR,			// loadOp
458 																 VK_ATTACHMENT_STORE_OP_DONT_CARE,		// storeOp
459 																 VK_ATTACHMENT_LOAD_OP_DONT_CARE,		// stencilLoadOp
460 																 VK_ATTACHMENT_STORE_OP_DONT_CARE,		// stencilLoadOp
461 																 VK_IMAGE_LAYOUT_GENERAL,				// initialLauout
462 																 VK_IMAGE_LAYOUT_GENERAL));				// finalLayout
463 
464 		const VkAttachmentReference colorAttachmentReference =
465 		{
466 			0,																							// attachment
467 			VK_IMAGE_LAYOUT_GENERAL																		// layout
468 		};
469 
470 		renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS,				// pipelineBindPoint
471 														   0,											// flags
472 														   0,											// inputCount
473 														   DE_NULL,										// pInputAttachments
474 														   1,											// colorCount
475 														   &colorAttachmentReference,					// pColorAttachments
476 														   DE_NULL,										// pResolveAttachments
477 														   AttachmentReference(),						// depthStencilAttachment
478 														   0,											// preserveCount
479 														   DE_NULL));									// preserveAttachments
480 
481 		m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
482 
483 		std::vector<VkImageView> attachments(1);
484 		attachments[0] = *m_attachmentView;
485 
486 		FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
487 		m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
488 	}
489 
490 	// pipeline
491 	{
492 		Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
493 		Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
494 
495 		const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
496 
497 		const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
498 		m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
499 
500 		const VkVertexInputBindingDescription vf_binding_desc =
501 		{
502 			0,																// binding
503 			4 * (deUint32)sizeof(float),									// stride
504 			VK_VERTEX_INPUT_RATE_VERTEX										// inputRate
505 		};
506 
507 		const VkVertexInputAttributeDescription vf_attribute_desc =
508 		{
509 			0,																// location
510 			0,																// binding
511 			VK_FORMAT_R32G32B32A32_SFLOAT,									// format
512 			0																// offset
513 		};
514 
515 		const VkPipelineVertexInputStateCreateInfo vf_info =
516 		{
517 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// sType
518 			NULL,															// pNext
519 			0u,																// flags
520 			1,																// vertexBindingDescriptionCount
521 			&vf_binding_desc,												// pVertexBindingDescriptions
522 			1,																// vertexAttributeDescriptionCount
523 			&vf_attribute_desc												// pVertexAttributeDescriptions
524 		};
525 
526 		PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
527 		pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
528 		pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
529 		pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
530 		pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
531 		const VkViewport viewport	= makeViewport(m_size, m_size);
532 		const VkRect2D scissor		= makeRect2D(m_size, m_size);
533 		pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport), std::vector<VkRect2D>(1, scissor)));
534 		pipelineCreateInfo.addState(PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
535 		pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
536 		pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
537 		pipelineCreateInfo.addState(vf_info);
538 		m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
539 	}
540 
541 	// vertex buffer
542 	{
543 		std::vector<tcu::Vec4> vertices(3);
544 		vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
545 		vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
546 		vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
547 
548 		const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
549 		m_vertexBuffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
550 
551 		tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4*>(m_vertexBuffer->getBoundMemory().getHostPtr());
552 		deMemcpy(ptr, &vertices[0], kBufferSize);
553 
554 		flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
555 	}
556 }
557 
558 
559 class GraphicQueryTest : public GraphicQueryTestBase
560 {
561 public:
562 						GraphicQueryTest	(vkt::Context&	context);
563 	tcu::TestStatus		iterate				(void);
564 };
565 
GraphicQueryTest(vkt::Context & context)566 GraphicQueryTest::GraphicQueryTest(vkt::Context& context)
567 	: GraphicQueryTestBase(context)
568 {
569 }
570 
iterate(void)571 tcu::TestStatus GraphicQueryTest::iterate(void)
572 {
573 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
574 	const VkDevice				device				= m_context.getDevice();
575 	const VkQueue				queue				= m_context.getUniversalQueue();
576 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
577 	Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
578 	Unique<VkCommandBuffer>		cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
579 
580 	initStateObjects();
581 	setupCounters();
582 
583 	vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
584 
585 	if (!acquireProfilingLock())
586 	{
587 		// lock was not acquired in given time, we can't fail the test
588 		return tcu::TestStatus::pass("Pass");
589 	}
590 
591 	// reset query pool
592 	{
593 		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
594 		const Unique<VkFence>		fence			(createFence(vkd, device));
595 		const VkSubmitInfo			submitInfo		=
596 		{
597 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
598 			DE_NULL,											// pNext
599 			0u,													// waitSemaphoreCount
600 			DE_NULL,											// pWaitSemaphores
601 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
602 			1u,													// commandBufferCount
603 			&resetCmdBuffer.get(),								// pCommandBuffers
604 			0u,													// signalSemaphoreCount
605 			DE_NULL,											// pSignalSemaphores
606 		};
607 
608 		beginCommandBuffer(vkd, *resetCmdBuffer);
609 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
610 		endCommandBuffer(vkd, *resetCmdBuffer);
611 
612 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
613 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
614 	}
615 
616 	// begin command buffer
617 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
618 
619 	initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
620 								  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
621 
622 	// begin render pass
623 	VkClearValue renderPassClearValue;
624 	deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
625 
626 	// perform query during triangle draw
627 	vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
628 
629 	beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
630 					makeRect2D(0, 0, m_size, m_size),
631 					1, &renderPassClearValue);
632 
633 	// bind pipeline
634 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
635 
636 	// bind vertex buffer
637 	VkBuffer vertexBuffer = m_vertexBuffer->object();
638 	const VkDeviceSize vertexBufferOffset = 0;
639 	vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
640 
641 	vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
642 
643 	endRenderPass(vkd, *cmdBuffer);
644 
645 	vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
646 
647 	transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
648 					  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
649 					  VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
650 
651 	endCommandBuffer(vkd, *cmdBuffer);
652 
653 	// submit command buffer for each pass and wait for its completion
654 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
655 	{
656 		const Unique<VkFence> fence(createFence(vkd, device));
657 
658 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
659 		{
660 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
661 			NULL,
662 			passIndex
663 		};
664 
665 		const VkSubmitInfo submitInfo =
666 		{
667 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
668 			&performanceQuerySubmitInfo,						// pNext
669 			0u,													// waitSemaphoreCount
670 			DE_NULL,											// pWaitSemaphores
671 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
672 			1u,													// commandBufferCount
673 			&cmdBuffer.get(),									// pCommandBuffers
674 			0u,													// signalSemaphoreCount
675 			DE_NULL,											// pSignalSemaphores
676 		};
677 
678 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
679 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
680 	}
681 
682 	releaseProfilingLock();
683 
684 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
685 
686 	if (verifyQueryResults(*queryPool))
687 		return tcu::TestStatus::pass("Pass");
688 	return tcu::TestStatus::fail("Fail");
689 }
690 
691 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
692 {
693 public:
694 						GraphicMultiplePoolsTest	(vkt::Context&	context);
695 	tcu::TestStatus		iterate						(void);
696 };
697 
GraphicMultiplePoolsTest(vkt::Context & context)698 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context& context)
699 	: GraphicQueryTestBase(context)
700 {
701 }
702 
iterate(void)703 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
704 {
705 	if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
706 		throw tcu::NotSupportedError("MultipleQueryPools not supported");
707 
708 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
709 	const VkDevice				device				= m_context.getDevice();
710 	const VkQueue				queue				= m_context.getUniversalQueue();
711 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
712 	Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
713 	Unique<VkCommandBuffer>		cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
714 
715 	initStateObjects();
716 	setupCounters();
717 
718 	vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
719 							queryPool2(createQueryPool(1, 2));
720 
721 	if (!acquireProfilingLock())
722 	{
723 		// lock was not acquired in given time, we can't fail the test
724 		return tcu::TestStatus::pass("Pass");
725 	}
726 
727 	// reset query pools
728 	{
729 		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
730 		const Unique<VkFence>		fence			(createFence(vkd, device));
731 		const VkSubmitInfo			submitInfo		=
732 		{
733 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
734 			DE_NULL,											// pNext
735 			0u,													// waitSemaphoreCount
736 			DE_NULL,											// pWaitSemaphores
737 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
738 			1u,													// commandBufferCount
739 			&resetCmdBuffer.get(),								// pCommandBuffers
740 			0u,													// signalSemaphoreCount
741 			DE_NULL,											// pSignalSemaphores
742 		};
743 
744 		beginCommandBuffer(vkd, *resetCmdBuffer);
745 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
746 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
747 		endCommandBuffer(vkd, *resetCmdBuffer);
748 
749 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
750 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
751 	}
752 
753 	// begin command buffer
754 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
755 
756 	initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
757 								  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
758 
759 	// begin render pass
760 	VkClearValue renderPassClearValue;
761 	deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
762 
763 	VkBuffer			vertexBuffer		= m_vertexBuffer->object();
764 	const VkDeviceSize	vertexBufferOffset	= 0;
765 	const VkQueryPool	queryPools[]		=
766 	{
767 		*queryPool1,
768 		*queryPool2
769 	};
770 
771 	// perform two queries during triangle draw
772 	for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
773 	{
774 		const VkQueryPool queryPool = queryPools[loop];
775 		vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
776 		beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
777 						makeRect2D(0, 0, m_size, m_size),
778 						1, &renderPassClearValue);
779 
780 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
781 		vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
782 		vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
783 
784 		endRenderPass(vkd, *cmdBuffer);
785 		vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
786 	}
787 
788 	transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
789 					  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
790 					  VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
791 
792 	endCommandBuffer(vkd, *cmdBuffer);
793 
794 	// submit command buffer for each pass and wait for its completion
795 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
796 	{
797 		const Unique<VkFence> fence(createFence(vkd, device));
798 
799 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
800 		{
801 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
802 			NULL,
803 			passIndex
804 		};
805 
806 		const VkSubmitInfo submitInfo =
807 		{
808 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
809 			&performanceQuerySubmitInfo,						// pNext
810 			0u,													// waitSemaphoreCount
811 			DE_NULL,											// pWaitSemaphores
812 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
813 			1u,													// commandBufferCount
814 			&cmdBuffer.get(),									// pCommandBuffers
815 			0u,													// signalSemaphoreCount
816 			DE_NULL,											// pSignalSemaphores
817 		};
818 
819 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
820 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
821 	}
822 
823 	releaseProfilingLock();
824 
825 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
826 
827 	if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
828 		return tcu::TestStatus::pass("Pass");
829 	return tcu::TestStatus::fail("Fail");
830 }
831 
832 // Base class for all compute tests
833 class ComputeQueryTestBase : public QueryTestBase
834 {
835 public:
836 	ComputeQueryTestBase(vkt::Context&	context);
837 
838 protected:
839 	void initStateObjects(void);
840 
841 protected:
842 	Move<VkPipeline>		m_pipeline;
843 	Move<VkPipelineLayout>	m_pipelineLayout;
844 	de::SharedPtr<Buffer>	m_buffer;
845 	Move<VkDescriptorPool>	m_descriptorPool;
846 	Move<VkDescriptorSet>	m_descriptorSet;
847 	VkDescriptorBufferInfo	m_descriptorBufferInfo;
848 	VkBufferMemoryBarrier	m_computeFinishBarrier;
849 };
850 
ComputeQueryTestBase(vkt::Context & context)851 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context& context)
852 	: QueryTestBase(context)
853 {
854 }
855 
initStateObjects(void)856 void ComputeQueryTestBase::initStateObjects(void)
857 {
858 	const DeviceInterface&			vkd = m_context.getDeviceInterface();
859 	const VkDevice					device = m_context.getDevice();
860 	const VkDeviceSize				bufferSize = 32 * sizeof(deUint32);
861 	const CmdPoolCreateInfo			cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
862 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
863 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
864 
865 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(DescriptorSetLayoutBuilder()
866 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
867 		.build(vkd, device));
868 
869 	// create pipeline layout
870 	{
871 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
872 		{
873 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
874 			DE_NULL,													// pNext
875 			0u,															// flags
876 			1u,															// setLayoutCount
877 			&(*descriptorSetLayout),									// pSetLayouts
878 			0u,															// pushConstantRangeCount
879 			DE_NULL,													// pPushConstantRanges
880 		};
881 		m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
882 	}
883 
884 	// create compute pipeline
885 	{
886 		const Unique<VkShaderModule> cs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
887 		const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
888 		{
889 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// sType
890 			DE_NULL,													// pNext
891 			(VkPipelineShaderStageCreateFlags)0u,						// flags
892 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
893 			*cs,														// module
894 			"main",														// pName
895 			DE_NULL,													// pSpecializationInfo
896 		};
897 		const VkComputePipelineCreateInfo pipelineCreateInfo =
898 		{
899 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,				// sType
900 			DE_NULL,													// pNext
901 			(VkPipelineCreateFlags)0u,									// flags
902 			pipelineShaderStageParams,									// stage
903 			*m_pipelineLayout,											// layout
904 			DE_NULL,													// basePipelineHandle
905 			0,															// basePipelineIndex
906 		};
907 		m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
908 	}
909 
910 	m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
911 		m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
912 	m_descriptorPool = DescriptorPoolBuilder()
913 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
914 		.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
915 	const VkDescriptorSetAllocateInfo allocateParams =
916 	{
917 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,		// sType
918 		DE_NULL,											// pNext
919 		*m_descriptorPool,									// descriptorPool
920 		1u,													// setLayoutCount
921 		&(*descriptorSetLayout),							// pSetLayouts
922 	};
923 
924 	m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
925 	const VkDescriptorBufferInfo descriptorInfo =
926 	{
927 		m_buffer->object(),	// buffer
928 		0ull,				// offset
929 		bufferSize,			// range
930 	};
931 
932 	DescriptorSetUpdateBuilder()
933 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
934 		.update(vkd, device);
935 
936 	// clear buffer
937 	const std::vector<deUint8>	data((size_t)bufferSize, 0u);
938 	const Allocation&			allocation = m_buffer->getBoundMemory();
939 	void*						allocationData = allocation.getHostPtr();
940 	invalidateAlloc(vkd, device, allocation);
941 	deMemcpy(allocationData, &data[0], (size_t)bufferSize);
942 
943 	const VkBufferMemoryBarrier barrier =
944 	{
945 		VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,					// sType
946 		DE_NULL,													// pNext
947 		VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,		// srcAccessMask
948 		VK_ACCESS_HOST_READ_BIT,									// dstAccessMask
949 		VK_QUEUE_FAMILY_IGNORED,									// srcQueueFamilyIndex
950 		VK_QUEUE_FAMILY_IGNORED,									// destQueueFamilyIndex
951 		m_buffer->object(),											// buffer
952 		0ull,														// offset
953 		bufferSize,													// size
954 	};
955 	m_computeFinishBarrier = barrier;
956 }
957 
958 class ComputeQueryTest : public ComputeQueryTestBase
959 {
960 public:
961 						ComputeQueryTest	(vkt::Context&	context);
962 	tcu::TestStatus		iterate				(void);
963 };
964 
ComputeQueryTest(vkt::Context & context)965 ComputeQueryTest::ComputeQueryTest(vkt::Context& context)
966 	: ComputeQueryTestBase(context)
967 {
968 }
969 
iterate(void)970 tcu::TestStatus ComputeQueryTest::iterate(void)
971 {
972 	const DeviceInterface&			vkd					= m_context.getDeviceInterface();
973 	const VkDevice					device				= m_context.getDevice();
974 	const VkQueue					queue				= m_context.getUniversalQueue();
975 	const CmdPoolCreateInfo			cmdPoolCreateInfo	(m_context.getUniversalQueueFamilyIndex());
976 	const Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
977 	const Unique<VkCommandBuffer>	resetCmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
978 	const Unique<VkCommandBuffer>	cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
979 
980 	initStateObjects();
981 	setupCounters();
982 
983 	vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
984 
985 	if (!acquireProfilingLock())
986 	{
987 		// lock was not acquired in given time, we can't fail the test
988 		return tcu::TestStatus::pass("Pass");
989 	}
990 
991 	beginCommandBuffer(vkd, *resetCmdBuffer);
992 	vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
993 	endCommandBuffer(vkd, *resetCmdBuffer);
994 
995 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
996 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
997 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
998 
999 	vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
1000 	vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1001 	vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
1002 
1003 	vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1004 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1005 	endCommandBuffer(vkd, *cmdBuffer);
1006 
1007 	// submit reset of queries only once
1008 	{
1009 		const VkSubmitInfo submitInfo =
1010 		{
1011 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1012 			DE_NULL,											// pNext
1013 			0u,													// waitSemaphoreCount
1014 			DE_NULL,											// pWaitSemaphores
1015 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1016 			1u,													// commandBufferCount
1017 			&resetCmdBuffer.get(),								// pCommandBuffers
1018 			0u,													// signalSemaphoreCount
1019 			DE_NULL,											// pSignalSemaphores
1020 		};
1021 
1022 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1023 	}
1024 
1025 	// submit command buffer for each pass and wait for its completion
1026 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1027 	{
1028 		const Unique<VkFence> fence(createFence(vkd, device));
1029 
1030 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1031 		{
1032 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1033 			NULL,
1034 			passIndex
1035 		};
1036 
1037 		const VkSubmitInfo submitInfo =
1038 		{
1039 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1040 			&performanceQuerySubmitInfo,						// pNext
1041 			0u,													// waitSemaphoreCount
1042 			DE_NULL,											// pWaitSemaphores
1043 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1044 			1u,													// commandBufferCount
1045 			&cmdBuffer.get(),									// pCommandBuffers
1046 			0u,													// signalSemaphoreCount
1047 			DE_NULL,											// pSignalSemaphores
1048 		};
1049 
1050 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1051 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1052 	}
1053 
1054 	releaseProfilingLock();
1055 
1056 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1057 
1058 	if (verifyQueryResults(*queryPool))
1059 		return tcu::TestStatus::pass("Pass");
1060 	return tcu::TestStatus::fail("Fail");
1061 }
1062 
1063 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1064 {
1065 public:
1066 					ComputeMultiplePoolsTest	(vkt::Context&	context);
1067 	tcu::TestStatus iterate						(void);
1068 };
1069 
ComputeMultiplePoolsTest(vkt::Context & context)1070 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context& context)
1071 	: ComputeQueryTestBase(context)
1072 {
1073 }
1074 
iterate(void)1075 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1076 {
1077 	if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
1078 		throw tcu::NotSupportedError("MultipleQueryPools not supported");
1079 
1080 	const DeviceInterface&			vkd = m_context.getDeviceInterface();
1081 	const VkDevice					device = m_context.getDevice();
1082 	const VkQueue					queue = m_context.getUniversalQueue();
1083 	const CmdPoolCreateInfo			cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
1084 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1085 	const Unique<VkCommandBuffer>	resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1086 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1087 
1088 	initStateObjects();
1089 	setupCounters();
1090 
1091 	vk::Unique<VkQueryPool>	queryPool1(createQueryPool(0, 2)),
1092 							queryPool2(createQueryPool(1, 2));
1093 
1094 	if (!acquireProfilingLock())
1095 	{
1096 		// lock was not acquired in given time, we can't fail the test
1097 		return tcu::TestStatus::pass("Pass");
1098 	}
1099 
1100 	const VkQueryPool queryPools[] =
1101 	{
1102 		*queryPool1,
1103 		*queryPool2
1104 	};
1105 
1106 	beginCommandBuffer(vkd, *resetCmdBuffer);
1107 	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1108 	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1109 	endCommandBuffer(vkd, *resetCmdBuffer);
1110 
1111 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1112 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1113 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1114 
1115 	// perform two queries
1116 	for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1117 	{
1118 		const VkQueryPool queryPool = queryPools[loop];
1119 		vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1120 		vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1121 		vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1122 	}
1123 
1124 	vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1125 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1126 	endCommandBuffer(vkd, *cmdBuffer);
1127 
1128 	// submit reset of queries only once
1129 	{
1130 		const VkSubmitInfo submitInfo =
1131 		{
1132 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1133 			DE_NULL,											// pNext
1134 			0u,													// waitSemaphoreCount
1135 			DE_NULL,											// pWaitSemaphores
1136 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1137 			1u,													// commandBufferCount
1138 			&resetCmdBuffer.get(),								// pCommandBuffers
1139 			0u,													// signalSemaphoreCount
1140 			DE_NULL,											// pSignalSemaphores
1141 		};
1142 
1143 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1144 	}
1145 
1146 	// submit command buffer for each pass and wait for its completion
1147 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1148 	{
1149 		const Unique<VkFence> fence(createFence(vkd, device));
1150 
1151 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1152 		{
1153 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1154 			NULL,
1155 			passIndex
1156 		};
1157 
1158 		const VkSubmitInfo submitInfo =
1159 		{
1160 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1161 			&performanceQuerySubmitInfo,						// pNext
1162 			0u,													// waitSemaphoreCount
1163 			DE_NULL,											// pWaitSemaphores
1164 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1165 			1u,													// commandBufferCount
1166 			&cmdBuffer.get(),									// pCommandBuffers
1167 			0u,													// signalSemaphoreCount
1168 			DE_NULL,											// pSignalSemaphores
1169 		};
1170 
1171 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1172 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1173 	}
1174 
1175 	releaseProfilingLock();
1176 
1177 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1178 
1179 	if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
1180 		return tcu::TestStatus::pass("Pass");
1181 	return tcu::TestStatus::fail("Fail");
1182 }
1183 
1184 enum TestType
1185 {
1186 	TT_ENUMERATE_AND_VALIDATE	= 0,
1187 	TT_QUERY,
1188 	TT_MULTIPLE_POOLS
1189 };
1190 
1191 class QueryPoolPerformanceTest : public TestCase
1192 {
1193 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,const char * name)1194 	QueryPoolPerformanceTest (tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits, const char *name)
1195 		: TestCase			(context, name)
1196 		, m_testType		(testType)
1197 		, m_queueFlagBits	(queueFlagBits)
1198 	{
1199 	}
1200 
createInstance(vkt::Context & context) const1201 	vkt::TestInstance* createInstance (vkt::Context& context) const
1202 	{
1203 		if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1204 			return new EnumerateAndValidateTest(context, m_queueFlagBits);
1205 
1206 		if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1207 		{
1208 			if (m_testType == TT_QUERY)
1209 				return new GraphicQueryTest(context);
1210 			return new GraphicMultiplePoolsTest(context);
1211 		}
1212 
1213 		// tests for VK_QUEUE_COMPUTE_BIT
1214 		if (m_testType == TT_QUERY)
1215 			return new ComputeQueryTest(context);
1216 		return new ComputeMultiplePoolsTest(context);
1217 	}
1218 
initPrograms(SourceCollections & programCollection) const1219 	void initPrograms (SourceCollections& programCollection) const
1220 	{
1221 		// validation test do not need programs
1222 		if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1223 			return;
1224 
1225 		if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1226 		{
1227 			programCollection.glslSources.add("comp")
1228 				<< glu::ComputeSource("#version 430\n"
1229 									  "layout (local_size_x = 1) in;\n"
1230 									  "layout(binding = 0) writeonly buffer Output {\n"
1231 									  "		uint values[];\n"
1232 									  "} sb_out;\n\n"
1233 									  "void main (void) {\n"
1234 									  "		uint index = uint(gl_GlobalInvocationID.x);\n"
1235 									  "		sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1236 									  "}\n");
1237 			return;
1238 		}
1239 
1240 		programCollection.glslSources.add("frag")
1241 			<< glu::FragmentSource("#version 430\n"
1242 								   "layout(location = 0) out vec4 out_FragColor;\n"
1243 								   "void main()\n"
1244 								   "{\n"
1245 								   "	out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1246 								   "}\n");
1247 
1248 		programCollection.glslSources.add("vert")
1249 			<< glu::VertexSource("#version 430\n"
1250 								 "layout(location = 0) in vec4 in_Position;\n"
1251 								 "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1252 								 "void main() {\n"
1253 								 "	gl_Position  = in_Position;\n"
1254 								 "	gl_PointSize = 1.0;\n"
1255 								 "}\n");
1256 	}
1257 
1258 private:
1259 
1260 	TestType			m_testType;
1261 	VkQueueFlagBits		m_queueFlagBits;
1262 };
1263 
1264 } //anonymous
1265 
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1266 QueryPoolPerformanceTests::QueryPoolPerformanceTests (tcu::TestContext &testCtx)
1267 	: TestCaseGroup(testCtx, "performance_query")
1268 {
1269 }
1270 
init(void)1271 void QueryPoolPerformanceTests::init (void)
1272 {
1273 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT, "enumerate_and_validate_graphic"));
1274 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT,  "enumerate_and_validate_compute"));
1275 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, "query_graphic"));
1276 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, "query_compute"));
1277 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, "multiple_pools_graphic"));
1278 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, "multiple_pools_compute"));
1279 }
1280 
1281 } //QueryPool
1282 } //vkt
1283