• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Vulkan Performance Query Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26 
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 
37 #include "deMath.h"
38 
39 #include "tcuTestLog.hpp"
40 #include "tcuResource.hpp"
41 #include "tcuImageCompare.hpp"
42 #include "vkImageUtil.hpp"
43 #include "tcuCommandLine.hpp"
44 #include "tcuRGBA.hpp"
45 
46 namespace vkt
47 {
48 namespace QueryPool
49 {
50 namespace
51 {
52 
53 using namespace vk;
54 using namespace Draw;
55 
uuidToHex(const deUint8 uuid[])56 std::string uuidToHex(const deUint8 uuid[])
57 {
58 	const size_t	bytesPerPart[]	= {4, 2, 2, 2, 6};
59 	const deUint8*	ptr				= &uuid[0];
60 	const size_t	stringSize		= VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
61 	std::string		result;
62 
63 	result.reserve(stringSize);
64 
65 	for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
66 	{
67 		const size_t	bytesInPart		= bytesPerPart[partNdx];
68 		const size_t	symbolsInPart	= 2 * bytesInPart;
69 		deUint64		part			= 0;
70 		std::string		partString;
71 
72 		for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
73 		{
74 			part = (part << 8) | *ptr;
75 			++ptr;
76 		}
77 
78 		partString	= tcu::toHex(part).toString();
79 
80 		DE_ASSERT(partString.size() > symbolsInPart);
81 
82 		result += (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
83 
84 		if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
85 			result += '-';
86 	}
87 
88 	DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
89 	DE_ASSERT(result.size() == stringSize);
90 
91 	return result;
92 }
93 
94 class EnumerateAndValidateTest : public TestInstance
95 {
96 public:
97 						EnumerateAndValidateTest		(vkt::Context&	context, VkQueueFlagBits queueFlagBits);
98 	tcu::TestStatus		iterate							(void);
99 
100 protected:
101 	void				basicValidateCounter			(const deUint32 familyIndex);
102 
103 private:
104 	VkQueueFlagBits		m_queueFlagBits;
105 	bool				m_requiredExtensionsPresent;
106 };
107 
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)108 EnumerateAndValidateTest::EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits)
109 	: TestInstance(context)
110 	, m_queueFlagBits(queueFlagBits)
111 	, m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
112 {
113 }
114 
iterate(void)115 tcu::TestStatus EnumerateAndValidateTest::iterate (void)
116 {
117 	const InstanceInterface&					vki				= m_context.getInstanceInterface();
118 	const VkPhysicalDevice						physicalDevice	= m_context.getPhysicalDevice();
119 	const std::vector<VkQueueFamilyProperties>	queueProperties	= getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
120 
121 	for (deUint32 queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
122 	{
123 		if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
124 			continue;
125 
126 		deUint32 counterCount = 0;
127 		VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCount, DE_NULL, DE_NULL));
128 
129 		if (counterCount == 0)
130 			continue;
131 
132 		{
133 			std::vector<VkPerformanceCounterKHR>	counters			(counterCount);
134 			deUint32								counterCountRead	= counterCount;
135 			std::map<std::string, size_t>			uuidValidator;
136 
137 			if (counterCount > 1)
138 			{
139 				deUint32	incompleteCounterCount	= counterCount - 1;
140 				VkResult	result;
141 
142 				result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
143 				if (result != VK_INCOMPLETE)
144 					TCU_FAIL("VK_INCOMPLETE not returned");
145 			}
146 
147 			VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
148 
149 			if (counterCountRead != counterCount)
150 				TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
151 
152 			for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
153 			{
154 				const VkPerformanceCounterKHR&	counter			= counters[counterNdx];
155 				const std::string				uuidStr			= uuidToHex(counter.uuid);
156 
157 				if (uuidValidator.find(uuidStr) != uuidValidator.end())
158 					TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
159 				else
160 					uuidValidator[uuidStr] = counterNdx;
161 
162 				if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
163 					TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
164 
165 				if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
166 					TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
167 
168 				if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
169 					TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
170 			}
171 		}
172 		{
173 			std::vector<VkPerformanceCounterDescriptionKHR>	counterDescriptors	(counterCount);
174 			deUint32										counterCountRead	= counterCount;
175 
176 			VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
177 
178 			if (counterCountRead != counterCount)
179 				TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
180 
181 			for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
182 			{
183 				const VkPerformanceCounterDescriptionKHR&		counterDescriptor	= counterDescriptors[counterNdx];
184 				const VkPerformanceCounterDescriptionFlagsKHR	allowedFlags		= VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR
185 																					| VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
186 
187 				if ((counterDescriptor.flags & ~allowedFlags) != 0)
188 					TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
189 			}
190 		}
191 	}
192 
193 	return tcu::TestStatus::pass("Pass");
194 }
195 
196 class QueryTestBase : public TestInstance
197 {
198 public:
199 						QueryTestBase	(vkt::Context&	context);
200 
201 protected:
202 
203 	void				setupCounters			(void);
204 	Move<VkQueryPool>	createQueryPool			(deUint32 enabledCounterOffset, deUint32 enabledCounterStride);
205 	bool				acquireProfilingLock	(void);
206 	void				releaseProfilingLock	(void);
207 	bool				verifyQueryResults		(VkQueryPool queryPool);
208 	deUint32			getRequiredNumerOfPasses(void);
209 
210 private:
211 
212 	bool									m_requiredExtensionsPresent;
213 	deUint32								m_requiredNumerOfPasses;
214 	std::map<deUint64, deUint32>			m_enabledCountersCountMap;		// number of counters that were enabled per query pool
215 	std::vector<VkPerformanceCounterKHR>	m_counters;						// counters provided by the device
216 };
217 
QueryTestBase(vkt::Context & context)218 QueryTestBase::QueryTestBase(vkt::Context& context)
219 	: TestInstance	(context)
220 	, m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
221 	, m_requiredNumerOfPasses(0)
222 {
223 }
224 
setupCounters()225 void QueryTestBase::setupCounters()
226 {
227 	const InstanceInterface&	vki					= m_context.getInstanceInterface();
228 	const VkPhysicalDevice		physicalDevice		= m_context.getPhysicalDevice();
229 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
230 	deUint32					queueFamilyIndex	= cmdPoolCreateInfo.queueFamilyIndex;
231 	deUint32					counterCount;
232 
233 	if (!m_context.getPerformanceQueryFeatures().performanceCounterQueryPools)
234 		TCU_THROW(NotSupportedError, "Performance counter query pools feature not supported");
235 
236 	// get the number of supported counters
237 	VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, NULL, NULL));
238 
239 	if (!counterCount)
240 		TCU_THROW(NotSupportedError, "QualityWarning: there are no performance counters");
241 
242 	// get supported counters
243 	m_counters.resize(counterCount);
244 	VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, &m_counters[0], DE_NULL));
245 }
246 
createQueryPool(deUint32 enabledCounterOffset,deUint32 enabledCounterStride)247 Move<VkQueryPool> QueryTestBase::createQueryPool(deUint32 enabledCounterOffset, deUint32 enabledCounterStride)
248 {
249 	const InstanceInterface&	vki					= m_context.getInstanceInterface();
250 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
251 	const VkPhysicalDevice		physicalDevice		= m_context.getPhysicalDevice();
252 	const VkDevice				device				= m_context.getDevice();
253 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
254 	const deUint32				counterCount		= (deUint32)m_counters.size();
255 	deUint32					enabledIndex		= enabledCounterOffset ? 0 : enabledCounterStride;
256 	std::vector<deUint32>		enabledCounters;
257 
258 	// enable every <enabledCounterStride> counter that has command or render pass scope
259 	for (deUint32 i = 0; i < counterCount; i++)
260 	{
261 		// handle offset
262 		if (enabledCounterOffset)
263 		{
264 			if (enabledCounterOffset == enabledIndex)
265 			{
266 				// disable handling offset
267 				enabledCounterOffset = 0;
268 
269 				// eneble next index in stride condition
270 				enabledIndex = enabledCounterStride;
271 			}
272 			else
273 			{
274 				++enabledIndex;
275 				continue;
276 			}
277 		}
278 
279 		// handle stride
280 		if (enabledIndex == enabledCounterStride)
281 		{
282 			enabledCounters.push_back(i);
283 			enabledIndex = 0;
284 		}
285 		else
286 			++enabledIndex;
287 	}
288 
289 	// get number of counters that were enabled for this query pool
290 	deUint32 enabledCountersCount = static_cast<deUint32>(enabledCounters.size());
291 	if (!enabledCountersCount)
292 		TCU_THROW(NotSupportedError, "QualityWarning: no performance counters");
293 
294 	// define performance query
295 	VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo =
296 	{
297 		VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
298 		NULL,
299 		cmdPoolCreateInfo.queueFamilyIndex,			// queue family that this performance query is performed on
300 		enabledCountersCount,						// number of counters to enable
301 		&enabledCounters[0]							// array of indices of counters to enable
302 	};
303 
304 	// get the number of passes counters will require
305 	vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &m_requiredNumerOfPasses);
306 
307 	// create query pool
308 	VkQueryPoolCreateInfo queryPoolCreateInfo =
309 	{
310 		VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
311 		&performanceQueryCreateInfo,
312 		0,											// flags
313 		VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,		// new query type
314 		1,											// queryCount
315 		0
316 	};
317 
318 	Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
319 
320 	// memorize number of enabled counters for this query pool
321 	m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
322 
323 	return queryPool;
324 }
325 
acquireProfilingLock()326 bool QueryTestBase::acquireProfilingLock()
327 {
328 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
329 	const VkDevice				device	= m_context.getDevice();
330 
331 	// acquire profiling lock before we record command buffers
332 	VkAcquireProfilingLockInfoKHR lockInfo =
333 	{
334 		VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
335 		NULL,
336 		0,
337 		2000000000ull					// wait 2s for the lock
338 	};
339 
340 	VkResult result = vkd.acquireProfilingLockKHR(device, &lockInfo);
341 	if (result == VK_TIMEOUT)
342 	{
343 		m_context.getTestContext().getLog() << tcu::TestLog::Message
344 			<< "Timeout reached, profiling lock wasn't acquired - test had to end earlier"
345 			<< tcu::TestLog::EndMessage;
346 		return false;
347 	}
348 	if (result != VK_SUCCESS)
349 		TCU_FAIL("Profiling lock wasn't acquired");
350 
351 	return true;
352 }
353 
releaseProfilingLock()354 void QueryTestBase::releaseProfilingLock()
355 {
356 	const DeviceInterface&	vkd		= m_context.getDeviceInterface();
357 	const VkDevice			device	= m_context.getDevice();
358 
359 	// release the profiling lock after the command buffer is no longer in the pending state
360 	vkd.releaseProfilingLockKHR(device);
361 }
362 
verifyQueryResults(VkQueryPool queryPool)363 bool QueryTestBase::verifyQueryResults(VkQueryPool queryPool)
364 {
365 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
366 	const VkDevice				device	= m_context.getDevice();
367 
368 	// create an array to hold the results of all counters
369 	deUint32 enabledCounterCount = m_enabledCountersCountMap[queryPool.getInternal()];
370 	std::vector<VkPerformanceCounterResultKHR> recordedCounters(enabledCounterCount);
371 
372 	// verify that query result can be retrieved
373 	VkResult result = vkd.getQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
374 		&recordedCounters[0], sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT);
375 	if (result == VK_NOT_READY)
376 	{
377 		m_context.getTestContext().getLog() << tcu::TestLog::Message
378 			<< "Pass but result is not ready"
379 			<< tcu::TestLog::EndMessage;
380 		return true;
381 	}
382 	return (result == VK_SUCCESS);
383 }
384 
getRequiredNumerOfPasses()385 deUint32 QueryTestBase::getRequiredNumerOfPasses()
386 {
387 	return m_requiredNumerOfPasses;
388 }
389 
390 // Base class for all graphic tests
391 class GraphicQueryTestBase : public QueryTestBase
392 {
393 public:
394 	GraphicQueryTestBase(vkt::Context&	context);
395 
396 protected:
397 	void initStateObjects(void);
398 
399 protected:
400 	Move<VkPipeline>		m_pipeline;
401 	Move<VkPipelineLayout>	m_pipelineLayout;
402 
403 	de::SharedPtr<Image>	m_colorAttachmentImage;
404 	Move<VkImageView>		m_attachmentView;
405 
406 	Move<VkRenderPass>		m_renderPass;
407 	Move<VkFramebuffer>		m_framebuffer;
408 
409 	de::SharedPtr<Buffer>	m_vertexBuffer;
410 
411 	VkFormat				m_colorAttachmentFormat;
412 	deUint32				m_size;
413 };
414 
GraphicQueryTestBase(vkt::Context & context)415 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context& context)
416 	: QueryTestBase(context)
417 	, m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
418 	, m_size(32)
419 {
420 }
421 
initStateObjects(void)422 void GraphicQueryTestBase::initStateObjects(void)
423 {
424 	const VkDevice				device	= m_context.getDevice();
425 	const DeviceInterface&		vkd		= m_context.getDeviceInterface();
426 
427 	//attachment images and views
428 	{
429 		VkExtent3D imageExtent =
430 		{
431 			m_size,		// width
432 			m_size,		// height
433 			1			// depth
434 		};
435 
436 		const ImageCreateInfo colorImageCreateInfo(VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1,
437 												   VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL,
438 												   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
439 
440 		m_colorAttachmentImage = Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
441 													   m_context.getUniversalQueueFamilyIndex());
442 
443 		const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D, m_colorAttachmentFormat);
444 		m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
445 	}
446 
447 	// renderpass and framebuffer
448 	{
449 		RenderPassCreateInfo renderPassCreateInfo;
450 		renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat,				// format
451 																 VK_SAMPLE_COUNT_1_BIT,					// samples
452 																 VK_ATTACHMENT_LOAD_OP_CLEAR,			// loadOp
453 																 VK_ATTACHMENT_STORE_OP_DONT_CARE,		// storeOp
454 																 VK_ATTACHMENT_LOAD_OP_DONT_CARE,		// stencilLoadOp
455 																 VK_ATTACHMENT_STORE_OP_DONT_CARE,		// stencilLoadOp
456 																 VK_IMAGE_LAYOUT_GENERAL,				// initialLauout
457 																 VK_IMAGE_LAYOUT_GENERAL));				// finalLayout
458 
459 		const VkAttachmentReference colorAttachmentReference =
460 		{
461 			0,																							// attachment
462 			VK_IMAGE_LAYOUT_GENERAL																		// layout
463 		};
464 
465 		renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS,				// pipelineBindPoint
466 														   0,											// flags
467 														   0,											// inputCount
468 														   DE_NULL,										// pInputAttachments
469 														   1,											// colorCount
470 														   &colorAttachmentReference,					// pColorAttachments
471 														   DE_NULL,										// pResolveAttachments
472 														   AttachmentReference(),						// depthStencilAttachment
473 														   0,											// preserveCount
474 														   DE_NULL));									// preserveAttachments
475 
476 		m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
477 
478 		std::vector<VkImageView> attachments(1);
479 		attachments[0] = *m_attachmentView;
480 
481 		FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
482 		m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
483 	}
484 
485 	// pipeline
486 	{
487 		Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
488 		Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
489 
490 		const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
491 
492 		const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
493 		m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
494 
495 		const VkVertexInputBindingDescription vf_binding_desc =
496 		{
497 			0,																// binding
498 			4 * (deUint32)sizeof(float),									// stride
499 			VK_VERTEX_INPUT_RATE_VERTEX										// inputRate
500 		};
501 
502 		const VkVertexInputAttributeDescription vf_attribute_desc =
503 		{
504 			0,																// location
505 			0,																// binding
506 			VK_FORMAT_R32G32B32A32_SFLOAT,									// format
507 			0																// offset
508 		};
509 
510 		const VkPipelineVertexInputStateCreateInfo vf_info =
511 		{
512 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// sType
513 			NULL,															// pNext
514 			0u,																// flags
515 			1,																// vertexBindingDescriptionCount
516 			&vf_binding_desc,												// pVertexBindingDescriptions
517 			1,																// vertexAttributeDescriptionCount
518 			&vf_attribute_desc												// pVertexAttributeDescriptions
519 		};
520 
521 		PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
522 		pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
523 		pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
524 		pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
525 		pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
526 		const VkViewport viewport	= makeViewport(m_size, m_size);
527 		const VkRect2D scissor		= makeRect2D(m_size, m_size);
528 		pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport), std::vector<VkRect2D>(1, scissor)));
529 		pipelineCreateInfo.addState(PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
530 		pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
531 		pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
532 		pipelineCreateInfo.addState(vf_info);
533 		m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
534 	}
535 
536 	// vertex buffer
537 	{
538 		std::vector<tcu::Vec4> vertices(3);
539 		vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
540 		vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
541 		vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
542 
543 		const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
544 		m_vertexBuffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
545 
546 		tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4*>(m_vertexBuffer->getBoundMemory().getHostPtr());
547 		deMemcpy(ptr, &vertices[0], kBufferSize);
548 
549 		flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
550 	}
551 }
552 
553 
554 class GraphicQueryTest : public GraphicQueryTestBase
555 {
556 public:
557 						GraphicQueryTest	(vkt::Context&	context);
558 	tcu::TestStatus		iterate				(void);
559 };
560 
GraphicQueryTest(vkt::Context & context)561 GraphicQueryTest::GraphicQueryTest(vkt::Context& context)
562 	: GraphicQueryTestBase(context)
563 {
564 }
565 
iterate(void)566 tcu::TestStatus GraphicQueryTest::iterate(void)
567 {
568 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
569 	const VkDevice				device				= m_context.getDevice();
570 	const VkQueue				queue				= m_context.getUniversalQueue();
571 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
572 	Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
573 	Unique<VkCommandBuffer>		cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
574 
575 	initStateObjects();
576 	setupCounters();
577 
578 	vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
579 
580 	if (!acquireProfilingLock())
581 	{
582 		// lock was not acquired in given time, we can't fail the test
583 		return tcu::TestStatus::pass("Pass");
584 	}
585 
586 	// reset query pool
587 	{
588 		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
589 		const Unique<VkFence>		fence			(createFence(vkd, device));
590 		const VkSubmitInfo			submitInfo		=
591 		{
592 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
593 			DE_NULL,											// pNext
594 			0u,													// waitSemaphoreCount
595 			DE_NULL,											// pWaitSemaphores
596 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
597 			1u,													// commandBufferCount
598 			&resetCmdBuffer.get(),								// pCommandBuffers
599 			0u,													// signalSemaphoreCount
600 			DE_NULL,											// pSignalSemaphores
601 		};
602 
603 		beginCommandBuffer(vkd, *resetCmdBuffer);
604 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
605 		endCommandBuffer(vkd, *resetCmdBuffer);
606 
607 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
608 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
609 	}
610 
611 	// begin command buffer
612 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
613 
614 	initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
615 								  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
616 
617 	// begin render pass
618 	VkClearValue renderPassClearValue;
619 	deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
620 
621 	// perform query during triangle draw
622 	vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
623 
624 	beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
625 					makeRect2D(0, 0, m_size, m_size),
626 					1, &renderPassClearValue);
627 
628 	// bind pipeline
629 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
630 
631 	// bind vertex buffer
632 	VkBuffer vertexBuffer = m_vertexBuffer->object();
633 	const VkDeviceSize vertexBufferOffset = 0;
634 	vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
635 
636 	vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
637 
638 	endRenderPass(vkd, *cmdBuffer);
639 
640 	vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
641 
642 	transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
643 					  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
644 					  VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
645 
646 	endCommandBuffer(vkd, *cmdBuffer);
647 
648 	// submit command buffer for each pass and wait for its completion
649 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
650 	{
651 		const Unique<VkFence> fence(createFence(vkd, device));
652 
653 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
654 		{
655 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
656 			NULL,
657 			passIndex
658 		};
659 
660 		const VkSubmitInfo submitInfo =
661 		{
662 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
663 			&performanceQuerySubmitInfo,						// pNext
664 			0u,													// waitSemaphoreCount
665 			DE_NULL,											// pWaitSemaphores
666 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
667 			1u,													// commandBufferCount
668 			&cmdBuffer.get(),									// pCommandBuffers
669 			0u,													// signalSemaphoreCount
670 			DE_NULL,											// pSignalSemaphores
671 		};
672 
673 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
674 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
675 	}
676 
677 	releaseProfilingLock();
678 
679 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
680 
681 	if (verifyQueryResults(*queryPool))
682 		return tcu::TestStatus::pass("Pass");
683 	return tcu::TestStatus::fail("Fail");
684 }
685 
686 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
687 {
688 public:
689 						GraphicMultiplePoolsTest	(vkt::Context&	context);
690 	tcu::TestStatus		iterate						(void);
691 };
692 
GraphicMultiplePoolsTest(vkt::Context & context)693 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context& context)
694 	: GraphicQueryTestBase(context)
695 {
696 }
697 
iterate(void)698 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
699 {
700 	if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
701 		throw tcu::NotSupportedError("MultipleQueryPools not supported");
702 
703 	const DeviceInterface&		vkd					= m_context.getDeviceInterface();
704 	const VkDevice				device				= m_context.getDevice();
705 	const VkQueue				queue				= m_context.getUniversalQueue();
706 	const CmdPoolCreateInfo		cmdPoolCreateInfo	= m_context.getUniversalQueueFamilyIndex();
707 	Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
708 	Unique<VkCommandBuffer>		cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
709 
710 	initStateObjects();
711 	setupCounters();
712 
713 	vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
714 							queryPool2(createQueryPool(1, 2));
715 
716 	if (!acquireProfilingLock())
717 	{
718 		// lock was not acquired in given time, we can't fail the test
719 		return tcu::TestStatus::pass("Pass");
720 	}
721 
722 	// reset query pools
723 	{
724 		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
725 		const Unique<VkFence>		fence			(createFence(vkd, device));
726 		const VkSubmitInfo			submitInfo		=
727 		{
728 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
729 			DE_NULL,											// pNext
730 			0u,													// waitSemaphoreCount
731 			DE_NULL,											// pWaitSemaphores
732 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
733 			1u,													// commandBufferCount
734 			&resetCmdBuffer.get(),								// pCommandBuffers
735 			0u,													// signalSemaphoreCount
736 			DE_NULL,											// pSignalSemaphores
737 		};
738 
739 		beginCommandBuffer(vkd, *resetCmdBuffer);
740 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
741 		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
742 		endCommandBuffer(vkd, *resetCmdBuffer);
743 
744 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
745 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
746 	}
747 
748 	// begin command buffer
749 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
750 
751 	initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
752 								  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
753 
754 	// begin render pass
755 	VkClearValue renderPassClearValue;
756 	deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
757 
758 	VkBuffer			vertexBuffer		= m_vertexBuffer->object();
759 	const VkDeviceSize	vertexBufferOffset	= 0;
760 	const VkQueryPool	queryPools[]		=
761 	{
762 		*queryPool1,
763 		*queryPool2
764 	};
765 
766 	// perform two queries during triangle draw
767 	for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
768 	{
769 		const VkQueryPool queryPool = queryPools[loop];
770 		vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
771 		beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
772 						makeRect2D(0, 0, m_size, m_size),
773 						1, &renderPassClearValue);
774 
775 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
776 		vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
777 		vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
778 
779 		endRenderPass(vkd, *cmdBuffer);
780 		vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
781 	}
782 
783 	transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
784 					  VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
785 					  VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
786 
787 	endCommandBuffer(vkd, *cmdBuffer);
788 
789 	// submit command buffer for each pass and wait for its completion
790 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
791 	{
792 		const Unique<VkFence> fence(createFence(vkd, device));
793 
794 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
795 		{
796 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
797 			NULL,
798 			passIndex
799 		};
800 
801 		const VkSubmitInfo submitInfo =
802 		{
803 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
804 			&performanceQuerySubmitInfo,						// pNext
805 			0u,													// waitSemaphoreCount
806 			DE_NULL,											// pWaitSemaphores
807 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
808 			1u,													// commandBufferCount
809 			&cmdBuffer.get(),									// pCommandBuffers
810 			0u,													// signalSemaphoreCount
811 			DE_NULL,											// pSignalSemaphores
812 		};
813 
814 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
815 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
816 	}
817 
818 	releaseProfilingLock();
819 
820 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
821 
822 	if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
823 		return tcu::TestStatus::pass("Pass");
824 	return tcu::TestStatus::fail("Fail");
825 }
826 
827 // Base class for all compute tests
828 class ComputeQueryTestBase : public QueryTestBase
829 {
830 public:
831 	ComputeQueryTestBase(vkt::Context&	context);
832 
833 protected:
834 	void initStateObjects(void);
835 
836 protected:
837 	Move<VkPipeline>		m_pipeline;
838 	Move<VkPipelineLayout>	m_pipelineLayout;
839 	de::SharedPtr<Buffer>	m_buffer;
840 	Move<VkDescriptorPool>	m_descriptorPool;
841 	Move<VkDescriptorSet>	m_descriptorSet;
842 	VkDescriptorBufferInfo	m_descriptorBufferInfo;
843 	VkBufferMemoryBarrier	m_computeFinishBarrier;
844 };
845 
ComputeQueryTestBase(vkt::Context & context)846 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context& context)
847 	: QueryTestBase(context)
848 {
849 }
850 
initStateObjects(void)851 void ComputeQueryTestBase::initStateObjects(void)
852 {
853 	const DeviceInterface&			vkd = m_context.getDeviceInterface();
854 	const VkDevice					device = m_context.getDevice();
855 	const VkDeviceSize				bufferSize = 32 * sizeof(deUint32);
856 	const CmdPoolCreateInfo			cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
857 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
858 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
859 
860 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(DescriptorSetLayoutBuilder()
861 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
862 		.build(vkd, device));
863 
864 	// create pipeline layout
865 	{
866 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
867 		{
868 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
869 			DE_NULL,													// pNext
870 			0u,															// flags
871 			1u,															// setLayoutCount
872 			&(*descriptorSetLayout),									// pSetLayouts
873 			0u,															// pushConstantRangeCount
874 			DE_NULL,													// pPushConstantRanges
875 		};
876 		m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
877 	}
878 
879 	// create compute pipeline
880 	{
881 		const Unique<VkShaderModule> cs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
882 		const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
883 		{
884 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// sType
885 			DE_NULL,													// pNext
886 			(VkPipelineShaderStageCreateFlags)0u,						// flags
887 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
888 			*cs,														// module
889 			"main",														// pName
890 			DE_NULL,													// pSpecializationInfo
891 		};
892 		const VkComputePipelineCreateInfo pipelineCreateInfo =
893 		{
894 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,				// sType
895 			DE_NULL,													// pNext
896 			(VkPipelineCreateFlags)0u,									// flags
897 			pipelineShaderStageParams,									// stage
898 			*m_pipelineLayout,											// layout
899 			DE_NULL,													// basePipelineHandle
900 			0,															// basePipelineIndex
901 		};
902 		m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
903 	}
904 
905 	m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
906 		m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
907 	m_descriptorPool = DescriptorPoolBuilder()
908 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
909 		.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
910 	const VkDescriptorSetAllocateInfo allocateParams =
911 	{
912 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,		// sType
913 		DE_NULL,											// pNext
914 		*m_descriptorPool,									// descriptorPool
915 		1u,													// setLayoutCount
916 		&(*descriptorSetLayout),							// pSetLayouts
917 	};
918 
919 	m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
920 	const VkDescriptorBufferInfo descriptorInfo =
921 	{
922 		m_buffer->object(),	// buffer
923 		0ull,				// offset
924 		bufferSize,			// range
925 	};
926 
927 	DescriptorSetUpdateBuilder()
928 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
929 		.update(vkd, device);
930 
931 	// clear buffer
932 	const std::vector<deUint8>	data((size_t)bufferSize, 0u);
933 	const Allocation&			allocation = m_buffer->getBoundMemory();
934 	void*						allocationData = allocation.getHostPtr();
935 	invalidateAlloc(vkd, device, allocation);
936 	deMemcpy(allocationData, &data[0], (size_t)bufferSize);
937 
938 	const VkBufferMemoryBarrier barrier =
939 	{
940 		VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,					// sType
941 		DE_NULL,													// pNext
942 		VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,		// srcAccessMask
943 		VK_ACCESS_HOST_READ_BIT,									// dstAccessMask
944 		VK_QUEUE_FAMILY_IGNORED,									// srcQueueFamilyIndex
945 		VK_QUEUE_FAMILY_IGNORED,									// destQueueFamilyIndex
946 		m_buffer->object(),											// buffer
947 		0ull,														// offset
948 		bufferSize,													// size
949 	};
950 	m_computeFinishBarrier = barrier;
951 }
952 
953 class ComputeQueryTest : public ComputeQueryTestBase
954 {
955 public:
956 						ComputeQueryTest	(vkt::Context&	context);
957 	tcu::TestStatus		iterate				(void);
958 };
959 
ComputeQueryTest(vkt::Context & context)960 ComputeQueryTest::ComputeQueryTest(vkt::Context& context)
961 	: ComputeQueryTestBase(context)
962 {
963 }
964 
iterate(void)965 tcu::TestStatus ComputeQueryTest::iterate(void)
966 {
967 	const DeviceInterface&			vkd					= m_context.getDeviceInterface();
968 	const VkDevice					device				= m_context.getDevice();
969 	const VkQueue					queue				= m_context.getUniversalQueue();
970 	const CmdPoolCreateInfo			cmdPoolCreateInfo	(m_context.getUniversalQueueFamilyIndex());
971 	const Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
972 	const Unique<VkCommandBuffer>	resetCmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
973 	const Unique<VkCommandBuffer>	cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
974 
975 	initStateObjects();
976 	setupCounters();
977 
978 	vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
979 
980 	if (!acquireProfilingLock())
981 	{
982 		// lock was not acquired in given time, we can't fail the test
983 		return tcu::TestStatus::pass("Pass");
984 	}
985 
986 	beginCommandBuffer(vkd, *resetCmdBuffer);
987 	vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
988 	endCommandBuffer(vkd, *resetCmdBuffer);
989 
990 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
991 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
992 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
993 
994 	vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
995 	vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
996 	vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
997 
998 	vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
999 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1000 	endCommandBuffer(vkd, *cmdBuffer);
1001 
1002 	// submit reset of queries only once
1003 	{
1004 		const VkSubmitInfo submitInfo =
1005 		{
1006 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1007 			DE_NULL,											// pNext
1008 			0u,													// waitSemaphoreCount
1009 			DE_NULL,											// pWaitSemaphores
1010 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1011 			1u,													// commandBufferCount
1012 			&resetCmdBuffer.get(),								// pCommandBuffers
1013 			0u,													// signalSemaphoreCount
1014 			DE_NULL,											// pSignalSemaphores
1015 		};
1016 
1017 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1018 	}
1019 
1020 	// submit command buffer for each pass and wait for its completion
1021 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1022 	{
1023 		const Unique<VkFence> fence(createFence(vkd, device));
1024 
1025 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1026 		{
1027 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1028 			NULL,
1029 			passIndex
1030 		};
1031 
1032 		const VkSubmitInfo submitInfo =
1033 		{
1034 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1035 			&performanceQuerySubmitInfo,						// pNext
1036 			0u,													// waitSemaphoreCount
1037 			DE_NULL,											// pWaitSemaphores
1038 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1039 			1u,													// commandBufferCount
1040 			&cmdBuffer.get(),									// pCommandBuffers
1041 			0u,													// signalSemaphoreCount
1042 			DE_NULL,											// pSignalSemaphores
1043 		};
1044 
1045 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1046 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1047 	}
1048 
1049 	releaseProfilingLock();
1050 
1051 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1052 
1053 	if (verifyQueryResults(*queryPool))
1054 		return tcu::TestStatus::pass("Pass");
1055 	return tcu::TestStatus::fail("Fail");
1056 }
1057 
1058 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1059 {
1060 public:
1061 					ComputeMultiplePoolsTest	(vkt::Context&	context);
1062 	tcu::TestStatus iterate						(void);
1063 };
1064 
ComputeMultiplePoolsTest(vkt::Context & context)1065 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context& context)
1066 	: ComputeQueryTestBase(context)
1067 {
1068 }
1069 
iterate(void)1070 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1071 {
1072 	if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
1073 		throw tcu::NotSupportedError("MultipleQueryPools not supported");
1074 
1075 	const DeviceInterface&			vkd = m_context.getDeviceInterface();
1076 	const VkDevice					device = m_context.getDevice();
1077 	const VkQueue					queue = m_context.getUniversalQueue();
1078 	const CmdPoolCreateInfo			cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
1079 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1080 	const Unique<VkCommandBuffer>	resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1081 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1082 
1083 	initStateObjects();
1084 	setupCounters();
1085 
1086 	vk::Unique<VkQueryPool>	queryPool1(createQueryPool(0, 2)),
1087 							queryPool2(createQueryPool(1, 2));
1088 
1089 	if (!acquireProfilingLock())
1090 	{
1091 		// lock was not acquired in given time, we can't fail the test
1092 		return tcu::TestStatus::pass("Pass");
1093 	}
1094 
1095 	const VkQueryPool queryPools[] =
1096 	{
1097 		*queryPool1,
1098 		*queryPool2
1099 	};
1100 
1101 	beginCommandBuffer(vkd, *resetCmdBuffer);
1102 	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1103 	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1104 	endCommandBuffer(vkd, *resetCmdBuffer);
1105 
1106 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1107 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1108 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1109 
1110 	// perform two queries
1111 	for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1112 	{
1113 		const VkQueryPool queryPool = queryPools[loop];
1114 		vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1115 		vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1116 		vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1117 	}
1118 
1119 	vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1120 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1121 	endCommandBuffer(vkd, *cmdBuffer);
1122 
1123 	// submit reset of queries only once
1124 	{
1125 		const VkSubmitInfo submitInfo =
1126 		{
1127 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1128 			DE_NULL,											// pNext
1129 			0u,													// waitSemaphoreCount
1130 			DE_NULL,											// pWaitSemaphores
1131 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1132 			1u,													// commandBufferCount
1133 			&resetCmdBuffer.get(),								// pCommandBuffers
1134 			0u,													// signalSemaphoreCount
1135 			DE_NULL,											// pSignalSemaphores
1136 		};
1137 
1138 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1139 	}
1140 
1141 	// submit command buffer for each pass and wait for its completion
1142 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1143 	{
1144 		const Unique<VkFence> fence(createFence(vkd, device));
1145 
1146 		VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1147 		{
1148 			VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1149 			NULL,
1150 			passIndex
1151 		};
1152 
1153 		const VkSubmitInfo submitInfo =
1154 		{
1155 			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
1156 			&performanceQuerySubmitInfo,						// pNext
1157 			0u,													// waitSemaphoreCount
1158 			DE_NULL,											// pWaitSemaphores
1159 			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
1160 			1u,													// commandBufferCount
1161 			&cmdBuffer.get(),									// pCommandBuffers
1162 			0u,													// signalSemaphoreCount
1163 			DE_NULL,											// pSignalSemaphores
1164 		};
1165 
1166 		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1167 		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1168 	}
1169 
1170 	releaseProfilingLock();
1171 
1172 	VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1173 
1174 	if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
1175 		return tcu::TestStatus::pass("Pass");
1176 	return tcu::TestStatus::fail("Fail");
1177 }
1178 
1179 enum TestType
1180 {
1181 	TT_ENUMERATE_AND_VALIDATE	= 0,
1182 	TT_QUERY,
1183 	TT_MULTIPLE_POOLS
1184 };
1185 
1186 class QueryPoolPerformanceTest : public TestCase
1187 {
1188 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,const char * name)1189 	QueryPoolPerformanceTest (tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits, const char *name)
1190 		: TestCase			(context, name, "")
1191 		, m_testType		(testType)
1192 		, m_queueFlagBits	(queueFlagBits)
1193 	{
1194 	}
1195 
createInstance(vkt::Context & context) const1196 	vkt::TestInstance* createInstance (vkt::Context& context) const
1197 	{
1198 		if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1199 			return new EnumerateAndValidateTest(context, m_queueFlagBits);
1200 
1201 		if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1202 		{
1203 			if (m_testType == TT_QUERY)
1204 				return new GraphicQueryTest(context);
1205 			return new GraphicMultiplePoolsTest(context);
1206 		}
1207 
1208 		// tests for VK_QUEUE_COMPUTE_BIT
1209 		if (m_testType == TT_QUERY)
1210 			return new ComputeQueryTest(context);
1211 		return new ComputeMultiplePoolsTest(context);
1212 	}
1213 
initPrograms(SourceCollections & programCollection) const1214 	void initPrograms (SourceCollections& programCollection) const
1215 	{
1216 		// validation test do not need programs
1217 		if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1218 			return;
1219 
1220 		if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1221 		{
1222 			programCollection.glslSources.add("comp")
1223 				<< glu::ComputeSource("#version 430\n"
1224 									  "layout (local_size_x = 1) in;\n"
1225 									  "layout(binding = 0) writeonly buffer Output {\n"
1226 									  "		uint values[];\n"
1227 									  "} sb_out;\n\n"
1228 									  "void main (void) {\n"
1229 									  "		uint index = uint(gl_GlobalInvocationID.x);\n"
1230 									  "		sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1231 									  "}\n");
1232 			return;
1233 		}
1234 
1235 		programCollection.glslSources.add("frag")
1236 			<< glu::FragmentSource("#version 430\n"
1237 								   "layout(location = 0) out vec4 out_FragColor;\n"
1238 								   "void main()\n"
1239 								   "{\n"
1240 								   "	out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1241 								   "}\n");
1242 
1243 		programCollection.glslSources.add("vert")
1244 			<< glu::VertexSource("#version 430\n"
1245 								 "layout(location = 0) in vec4 in_Position;\n"
1246 								 "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1247 								 "void main() {\n"
1248 								 "	gl_Position  = in_Position;\n"
1249 								 "	gl_PointSize = 1.0;\n"
1250 								 "}\n");
1251 	}
1252 
1253 private:
1254 
1255 	TestType			m_testType;
1256 	VkQueueFlagBits		m_queueFlagBits;
1257 };
1258 
1259 } //anonymous
1260 
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1261 QueryPoolPerformanceTests::QueryPoolPerformanceTests (tcu::TestContext &testCtx)
1262 	: TestCaseGroup(testCtx, "performance_query", "Tests for performance queries")
1263 {
1264 }
1265 
init(void)1266 void QueryPoolPerformanceTests::init (void)
1267 {
1268 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT, "enumerate_and_validate_graphic"));
1269 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT,  "enumerate_and_validate_compute"));
1270 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, "query_graphic"));
1271 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, "query_compute"));
1272 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, "multiple_pools_graphic"));
1273 	addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, "multiple_pools_compute"));
1274 }
1275 
1276 } //QueryPool
1277 } //vkt
1278