1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Vulkan Performance Query Tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36
37 #include "deMath.h"
38
39 #include "tcuTestLog.hpp"
40 #include "tcuResource.hpp"
41 #include "tcuImageCompare.hpp"
42 #include "vkImageUtil.hpp"
43 #include "tcuCommandLine.hpp"
44 #include "tcuRGBA.hpp"
45
46 namespace vkt
47 {
48 namespace QueryPool
49 {
50 namespace
51 {
52
53 using namespace vk;
54 using namespace Draw;
55
uuidToHex(const deUint8 uuid[])56 std::string uuidToHex(const deUint8 uuid[])
57 {
58 const size_t bytesPerPart[] = {4, 2, 2, 2, 6};
59 const deUint8* ptr = &uuid[0];
60 const size_t stringSize = VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
61 std::string result;
62
63 result.reserve(stringSize);
64
65 for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
66 {
67 const size_t bytesInPart = bytesPerPart[partNdx];
68 const size_t symbolsInPart = 2 * bytesInPart;
69 deUint64 part = 0;
70 std::string partString;
71
72 for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
73 {
74 part = (part << 8) | *ptr;
75 ++ptr;
76 }
77
78 partString = tcu::toHex(part).toString();
79
80 DE_ASSERT(partString.size() > symbolsInPart);
81
82 result += (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
83
84 if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
85 result += '-';
86 }
87
88 DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
89 DE_ASSERT(result.size() == stringSize);
90
91 return result;
92 }
93
94 class EnumerateAndValidateTest : public TestInstance
95 {
96 public:
97 EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits);
98 tcu::TestStatus iterate (void);
99
100 protected:
101 void basicValidateCounter (const deUint32 familyIndex);
102
103 private:
104 VkQueueFlagBits m_queueFlagBits;
105 bool m_requiredExtensionsPresent;
106 };
107
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)108 EnumerateAndValidateTest::EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits)
109 : TestInstance(context)
110 , m_queueFlagBits(queueFlagBits)
111 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
112 {
113 }
114
iterate(void)115 tcu::TestStatus EnumerateAndValidateTest::iterate (void)
116 {
117 const InstanceInterface& vki = m_context.getInstanceInterface();
118 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
119 const std::vector<VkQueueFamilyProperties> queueProperties = getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
120
121 for (deUint32 queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
122 {
123 if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
124 continue;
125
126 deUint32 counterCount = 0;
127 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCount, DE_NULL, DE_NULL));
128
129 if (counterCount == 0)
130 continue;
131
132 {
133 std::vector<VkPerformanceCounterKHR> counters (counterCount);
134 for (deUint32 i = 0; i < counterCount; ++i)
135 counters[i] = vk::initVulkanStructure();
136
137 deUint32 counterCountRead = counterCount;
138 std::map<std::string, size_t> uuidValidator;
139
140 if (counterCount > 1)
141 {
142 deUint32 incompleteCounterCount = counterCount - 1;
143 VkResult result;
144
145 result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
146 if (result != VK_INCOMPLETE)
147 TCU_FAIL("VK_INCOMPLETE not returned");
148 }
149
150 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
151
152 if (counterCountRead != counterCount)
153 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
154
155 for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
156 {
157 const VkPerformanceCounterKHR& counter = counters[counterNdx];
158 const std::string uuidStr = uuidToHex(counter.uuid);
159
160 if (uuidValidator.find(uuidStr) != uuidValidator.end())
161 TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
162 else
163 uuidValidator[uuidStr] = counterNdx;
164
165 if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
166 TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
167
168 if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
169 TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
170
171 if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
172 TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
173 }
174 }
175 {
176 std::vector<VkPerformanceCounterDescriptionKHR> counterDescriptors (counterCount);
177 for (deUint32 i = 0; i < counterCount; ++i)
178 counterDescriptors[i] = vk::initVulkanStructure();
179 deUint32 counterCountRead = counterCount;
180
181 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
182
183 if (counterCountRead != counterCount)
184 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
185
186 for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
187 {
188 const VkPerformanceCounterDescriptionKHR& counterDescriptor = counterDescriptors[counterNdx];
189 const VkPerformanceCounterDescriptionFlagsKHR allowedFlags = VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR
190 | VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
191
192 if ((counterDescriptor.flags & ~allowedFlags) != 0)
193 TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
194 }
195 }
196 }
197
198 return tcu::TestStatus::pass("Pass");
199 }
200
201 class QueryTestBase : public TestInstance
202 {
203 public:
204 QueryTestBase (vkt::Context& context);
205
206 protected:
207
208 void setupCounters (void);
209 Move<VkQueryPool> createQueryPool (deUint32 enabledCounterOffset, deUint32 enabledCounterStride);
210 bool acquireProfilingLock (void);
211 void releaseProfilingLock (void);
212 bool verifyQueryResults (VkQueryPool queryPool);
213 deUint32 getRequiredNumerOfPasses(void);
214
215 private:
216
217 bool m_requiredExtensionsPresent;
218 deUint32 m_requiredNumerOfPasses;
219 std::map<deUint64, deUint32> m_enabledCountersCountMap; // number of counters that were enabled per query pool
220 std::vector<VkPerformanceCounterKHR> m_counters; // counters provided by the device
221 };
222
QueryTestBase(vkt::Context & context)223 QueryTestBase::QueryTestBase(vkt::Context& context)
224 : TestInstance (context)
225 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
226 , m_requiredNumerOfPasses(0)
227 {
228 }
229
setupCounters()230 void QueryTestBase::setupCounters()
231 {
232 const InstanceInterface& vki = m_context.getInstanceInterface();
233 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
234 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
235 deUint32 queueFamilyIndex = cmdPoolCreateInfo.queueFamilyIndex;
236 deUint32 counterCount;
237
238 if (!m_context.getPerformanceQueryFeatures().performanceCounterQueryPools)
239 TCU_THROW(NotSupportedError, "Performance counter query pools feature not supported");
240
241 // get the number of supported counters
242 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, NULL, NULL));
243
244 if (!counterCount)
245 TCU_THROW(NotSupportedError, "QualityWarning: there are no performance counters");
246
247 // get supported counters
248 m_counters.resize(counterCount, vk::initVulkanStructure());
249 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, &m_counters[0], DE_NULL));
250 }
251
createQueryPool(deUint32 enabledCounterOffset,deUint32 enabledCounterStride)252 Move<VkQueryPool> QueryTestBase::createQueryPool(deUint32 enabledCounterOffset, deUint32 enabledCounterStride)
253 {
254 const InstanceInterface& vki = m_context.getInstanceInterface();
255 const DeviceInterface& vkd = m_context.getDeviceInterface();
256 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
257 const VkDevice device = m_context.getDevice();
258 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
259 const deUint32 counterCount = (deUint32)m_counters.size();
260 deUint32 enabledIndex = enabledCounterOffset ? 0 : enabledCounterStride;
261 std::vector<deUint32> enabledCounters;
262
263 // enable every <enabledCounterStride> counter that has command or render pass scope
264 for (deUint32 i = 0; i < counterCount; i++)
265 {
266 // handle offset
267 if (enabledCounterOffset)
268 {
269 if (enabledCounterOffset == enabledIndex)
270 {
271 // disable handling offset
272 enabledCounterOffset = 0;
273
274 // eneble next index in stride condition
275 enabledIndex = enabledCounterStride;
276 }
277 else
278 {
279 ++enabledIndex;
280 continue;
281 }
282 }
283
284 // handle stride
285 if (enabledIndex == enabledCounterStride)
286 {
287 enabledCounters.push_back(i);
288 enabledIndex = 0;
289 }
290 else
291 ++enabledIndex;
292 }
293
294 // get number of counters that were enabled for this query pool
295 deUint32 enabledCountersCount = static_cast<deUint32>(enabledCounters.size());
296 if (!enabledCountersCount)
297 TCU_THROW(NotSupportedError, "QualityWarning: no performance counters");
298
299 // define performance query
300 VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo =
301 {
302 VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
303 NULL,
304 cmdPoolCreateInfo.queueFamilyIndex, // queue family that this performance query is performed on
305 enabledCountersCount, // number of counters to enable
306 &enabledCounters[0] // array of indices of counters to enable
307 };
308
309 // get the number of passes counters will require
310 vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &m_requiredNumerOfPasses);
311
312 // create query pool
313 VkQueryPoolCreateInfo queryPoolCreateInfo =
314 {
315 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
316 &performanceQueryCreateInfo,
317 0, // flags
318 VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, // new query type
319 1, // queryCount
320 0
321 };
322
323 Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
324
325 // memorize number of enabled counters for this query pool
326 m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
327
328 return queryPool;
329 }
330
acquireProfilingLock()331 bool QueryTestBase::acquireProfilingLock()
332 {
333 const DeviceInterface& vkd = m_context.getDeviceInterface();
334 const VkDevice device = m_context.getDevice();
335
336 // acquire profiling lock before we record command buffers
337 VkAcquireProfilingLockInfoKHR lockInfo =
338 {
339 VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
340 NULL,
341 0,
342 2000000000ull // wait 2s for the lock
343 };
344
345 VkResult result = vkd.acquireProfilingLockKHR(device, &lockInfo);
346 if (result == VK_TIMEOUT)
347 {
348 m_context.getTestContext().getLog() << tcu::TestLog::Message
349 << "Timeout reached, profiling lock wasn't acquired - test had to end earlier"
350 << tcu::TestLog::EndMessage;
351 return false;
352 }
353 if (result != VK_SUCCESS)
354 TCU_FAIL("Profiling lock wasn't acquired");
355
356 return true;
357 }
358
releaseProfilingLock()359 void QueryTestBase::releaseProfilingLock()
360 {
361 const DeviceInterface& vkd = m_context.getDeviceInterface();
362 const VkDevice device = m_context.getDevice();
363
364 // release the profiling lock after the command buffer is no longer in the pending state
365 vkd.releaseProfilingLockKHR(device);
366 }
367
verifyQueryResults(VkQueryPool queryPool)368 bool QueryTestBase::verifyQueryResults(VkQueryPool queryPool)
369 {
370 const DeviceInterface& vkd = m_context.getDeviceInterface();
371 const VkDevice device = m_context.getDevice();
372
373 // create an array to hold the results of all counters
374 deUint32 enabledCounterCount = m_enabledCountersCountMap[queryPool.getInternal()];
375 std::vector<VkPerformanceCounterResultKHR> recordedCounters(enabledCounterCount);
376
377 // verify that query result can be retrieved
378 VkResult result = vkd.getQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
379 &recordedCounters[0], sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT);
380 if (result == VK_NOT_READY)
381 {
382 m_context.getTestContext().getLog() << tcu::TestLog::Message
383 << "Pass but result is not ready"
384 << tcu::TestLog::EndMessage;
385 return true;
386 }
387 return (result == VK_SUCCESS);
388 }
389
getRequiredNumerOfPasses()390 deUint32 QueryTestBase::getRequiredNumerOfPasses()
391 {
392 return m_requiredNumerOfPasses;
393 }
394
395 // Base class for all graphic tests
396 class GraphicQueryTestBase : public QueryTestBase
397 {
398 public:
399 GraphicQueryTestBase(vkt::Context& context);
400
401 protected:
402 void initStateObjects(void);
403
404 protected:
405 Move<VkPipeline> m_pipeline;
406 Move<VkPipelineLayout> m_pipelineLayout;
407
408 de::SharedPtr<Image> m_colorAttachmentImage;
409 Move<VkImageView> m_attachmentView;
410
411 Move<VkRenderPass> m_renderPass;
412 Move<VkFramebuffer> m_framebuffer;
413
414 de::SharedPtr<Buffer> m_vertexBuffer;
415
416 VkFormat m_colorAttachmentFormat;
417 deUint32 m_size;
418 };
419
GraphicQueryTestBase(vkt::Context & context)420 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context& context)
421 : QueryTestBase(context)
422 , m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
423 , m_size(32)
424 {
425 }
426
initStateObjects(void)427 void GraphicQueryTestBase::initStateObjects(void)
428 {
429 const VkDevice device = m_context.getDevice();
430 const DeviceInterface& vkd = m_context.getDeviceInterface();
431
432 //attachment images and views
433 {
434 VkExtent3D imageExtent =
435 {
436 m_size, // width
437 m_size, // height
438 1 // depth
439 };
440
441 const ImageCreateInfo colorImageCreateInfo(VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1,
442 VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL,
443 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
444
445 m_colorAttachmentImage = Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
446 m_context.getUniversalQueueFamilyIndex());
447
448 const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D, m_colorAttachmentFormat);
449 m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
450 }
451
452 // renderpass and framebuffer
453 {
454 RenderPassCreateInfo renderPassCreateInfo;
455 renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat, // format
456 VK_SAMPLE_COUNT_1_BIT, // samples
457 VK_ATTACHMENT_LOAD_OP_CLEAR, // loadOp
458 VK_ATTACHMENT_STORE_OP_DONT_CARE, // storeOp
459 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // stencilLoadOp
460 VK_ATTACHMENT_STORE_OP_DONT_CARE, // stencilLoadOp
461 VK_IMAGE_LAYOUT_GENERAL, // initialLauout
462 VK_IMAGE_LAYOUT_GENERAL)); // finalLayout
463
464 const VkAttachmentReference colorAttachmentReference =
465 {
466 0, // attachment
467 VK_IMAGE_LAYOUT_GENERAL // layout
468 };
469
470 renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
471 0, // flags
472 0, // inputCount
473 DE_NULL, // pInputAttachments
474 1, // colorCount
475 &colorAttachmentReference, // pColorAttachments
476 DE_NULL, // pResolveAttachments
477 AttachmentReference(), // depthStencilAttachment
478 0, // preserveCount
479 DE_NULL)); // preserveAttachments
480
481 m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
482
483 std::vector<VkImageView> attachments(1);
484 attachments[0] = *m_attachmentView;
485
486 FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
487 m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
488 }
489
490 // pipeline
491 {
492 Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
493 Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
494
495 const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
496
497 const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
498 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
499
500 const VkVertexInputBindingDescription vf_binding_desc =
501 {
502 0, // binding
503 4 * (deUint32)sizeof(float), // stride
504 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
505 };
506
507 const VkVertexInputAttributeDescription vf_attribute_desc =
508 {
509 0, // location
510 0, // binding
511 VK_FORMAT_R32G32B32A32_SFLOAT, // format
512 0 // offset
513 };
514
515 const VkPipelineVertexInputStateCreateInfo vf_info =
516 {
517 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // sType
518 NULL, // pNext
519 0u, // flags
520 1, // vertexBindingDescriptionCount
521 &vf_binding_desc, // pVertexBindingDescriptions
522 1, // vertexAttributeDescriptionCount
523 &vf_attribute_desc // pVertexAttributeDescriptions
524 };
525
526 PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
527 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
528 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
529 pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
530 pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
531 const VkViewport viewport = makeViewport(m_size, m_size);
532 const VkRect2D scissor = makeRect2D(m_size, m_size);
533 pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport), std::vector<VkRect2D>(1, scissor)));
534 pipelineCreateInfo.addState(PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
535 pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
536 pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
537 pipelineCreateInfo.addState(vf_info);
538 m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
539 }
540
541 // vertex buffer
542 {
543 std::vector<tcu::Vec4> vertices(3);
544 vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
545 vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
546 vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
547
548 const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
549 m_vertexBuffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
550
551 tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4*>(m_vertexBuffer->getBoundMemory().getHostPtr());
552 deMemcpy(ptr, &vertices[0], kBufferSize);
553
554 flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
555 }
556 }
557
558
559 class GraphicQueryTest : public GraphicQueryTestBase
560 {
561 public:
562 GraphicQueryTest (vkt::Context& context);
563 tcu::TestStatus iterate (void);
564 };
565
GraphicQueryTest(vkt::Context & context)566 GraphicQueryTest::GraphicQueryTest(vkt::Context& context)
567 : GraphicQueryTestBase(context)
568 {
569 }
570
iterate(void)571 tcu::TestStatus GraphicQueryTest::iterate(void)
572 {
573 const DeviceInterface& vkd = m_context.getDeviceInterface();
574 const VkDevice device = m_context.getDevice();
575 const VkQueue queue = m_context.getUniversalQueue();
576 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
577 Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
578 Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
579
580 initStateObjects();
581 setupCounters();
582
583 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
584
585 if (!acquireProfilingLock())
586 {
587 // lock was not acquired in given time, we can't fail the test
588 return tcu::TestStatus::pass("Pass");
589 }
590
591 // reset query pool
592 {
593 Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
594 const Unique<VkFence> fence (createFence(vkd, device));
595 const VkSubmitInfo submitInfo =
596 {
597 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
598 DE_NULL, // pNext
599 0u, // waitSemaphoreCount
600 DE_NULL, // pWaitSemaphores
601 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
602 1u, // commandBufferCount
603 &resetCmdBuffer.get(), // pCommandBuffers
604 0u, // signalSemaphoreCount
605 DE_NULL, // pSignalSemaphores
606 };
607
608 beginCommandBuffer(vkd, *resetCmdBuffer);
609 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
610 endCommandBuffer(vkd, *resetCmdBuffer);
611
612 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
613 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
614 }
615
616 // begin command buffer
617 beginCommandBuffer(vkd, *cmdBuffer, 0u);
618
619 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
620 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
621
622 // begin render pass
623 VkClearValue renderPassClearValue;
624 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
625
626 // perform query during triangle draw
627 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
628
629 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
630 makeRect2D(0, 0, m_size, m_size),
631 1, &renderPassClearValue);
632
633 // bind pipeline
634 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
635
636 // bind vertex buffer
637 VkBuffer vertexBuffer = m_vertexBuffer->object();
638 const VkDeviceSize vertexBufferOffset = 0;
639 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
640
641 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
642
643 endRenderPass(vkd, *cmdBuffer);
644
645 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
646
647 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
648 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
649 VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
650
651 endCommandBuffer(vkd, *cmdBuffer);
652
653 // submit command buffer for each pass and wait for its completion
654 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
655 {
656 const Unique<VkFence> fence(createFence(vkd, device));
657
658 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
659 {
660 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
661 NULL,
662 passIndex
663 };
664
665 const VkSubmitInfo submitInfo =
666 {
667 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
668 &performanceQuerySubmitInfo, // pNext
669 0u, // waitSemaphoreCount
670 DE_NULL, // pWaitSemaphores
671 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
672 1u, // commandBufferCount
673 &cmdBuffer.get(), // pCommandBuffers
674 0u, // signalSemaphoreCount
675 DE_NULL, // pSignalSemaphores
676 };
677
678 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
679 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
680 }
681
682 releaseProfilingLock();
683
684 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
685
686 if (verifyQueryResults(*queryPool))
687 return tcu::TestStatus::pass("Pass");
688 return tcu::TestStatus::fail("Fail");
689 }
690
691 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
692 {
693 public:
694 GraphicMultiplePoolsTest (vkt::Context& context);
695 tcu::TestStatus iterate (void);
696 };
697
GraphicMultiplePoolsTest(vkt::Context & context)698 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context& context)
699 : GraphicQueryTestBase(context)
700 {
701 }
702
iterate(void)703 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
704 {
705 if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
706 throw tcu::NotSupportedError("MultipleQueryPools not supported");
707
708 const DeviceInterface& vkd = m_context.getDeviceInterface();
709 const VkDevice device = m_context.getDevice();
710 const VkQueue queue = m_context.getUniversalQueue();
711 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
712 Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
713 Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
714
715 initStateObjects();
716 setupCounters();
717
718 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
719 queryPool2(createQueryPool(1, 2));
720
721 if (!acquireProfilingLock())
722 {
723 // lock was not acquired in given time, we can't fail the test
724 return tcu::TestStatus::pass("Pass");
725 }
726
727 // reset query pools
728 {
729 Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
730 const Unique<VkFence> fence (createFence(vkd, device));
731 const VkSubmitInfo submitInfo =
732 {
733 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
734 DE_NULL, // pNext
735 0u, // waitSemaphoreCount
736 DE_NULL, // pWaitSemaphores
737 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
738 1u, // commandBufferCount
739 &resetCmdBuffer.get(), // pCommandBuffers
740 0u, // signalSemaphoreCount
741 DE_NULL, // pSignalSemaphores
742 };
743
744 beginCommandBuffer(vkd, *resetCmdBuffer);
745 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
746 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
747 endCommandBuffer(vkd, *resetCmdBuffer);
748
749 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
750 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
751 }
752
753 // begin command buffer
754 beginCommandBuffer(vkd, *cmdBuffer, 0u);
755
756 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
757 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
758
759 // begin render pass
760 VkClearValue renderPassClearValue;
761 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
762
763 VkBuffer vertexBuffer = m_vertexBuffer->object();
764 const VkDeviceSize vertexBufferOffset = 0;
765 const VkQueryPool queryPools[] =
766 {
767 *queryPool1,
768 *queryPool2
769 };
770
771 // perform two queries during triangle draw
772 for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
773 {
774 const VkQueryPool queryPool = queryPools[loop];
775 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
776 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
777 makeRect2D(0, 0, m_size, m_size),
778 1, &renderPassClearValue);
779
780 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
781 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
782 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
783
784 endRenderPass(vkd, *cmdBuffer);
785 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
786 }
787
788 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
789 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
790 VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
791
792 endCommandBuffer(vkd, *cmdBuffer);
793
794 // submit command buffer for each pass and wait for its completion
795 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
796 {
797 const Unique<VkFence> fence(createFence(vkd, device));
798
799 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
800 {
801 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
802 NULL,
803 passIndex
804 };
805
806 const VkSubmitInfo submitInfo =
807 {
808 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
809 &performanceQuerySubmitInfo, // pNext
810 0u, // waitSemaphoreCount
811 DE_NULL, // pWaitSemaphores
812 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
813 1u, // commandBufferCount
814 &cmdBuffer.get(), // pCommandBuffers
815 0u, // signalSemaphoreCount
816 DE_NULL, // pSignalSemaphores
817 };
818
819 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
820 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
821 }
822
823 releaseProfilingLock();
824
825 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
826
827 if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
828 return tcu::TestStatus::pass("Pass");
829 return tcu::TestStatus::fail("Fail");
830 }
831
832 // Base class for all compute tests
833 class ComputeQueryTestBase : public QueryTestBase
834 {
835 public:
836 ComputeQueryTestBase(vkt::Context& context);
837
838 protected:
839 void initStateObjects(void);
840
841 protected:
842 Move<VkPipeline> m_pipeline;
843 Move<VkPipelineLayout> m_pipelineLayout;
844 de::SharedPtr<Buffer> m_buffer;
845 Move<VkDescriptorPool> m_descriptorPool;
846 Move<VkDescriptorSet> m_descriptorSet;
847 VkDescriptorBufferInfo m_descriptorBufferInfo;
848 VkBufferMemoryBarrier m_computeFinishBarrier;
849 };
850
ComputeQueryTestBase(vkt::Context & context)851 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context& context)
852 : QueryTestBase(context)
853 {
854 }
855
initStateObjects(void)856 void ComputeQueryTestBase::initStateObjects(void)
857 {
858 const DeviceInterface& vkd = m_context.getDeviceInterface();
859 const VkDevice device = m_context.getDevice();
860 const VkDeviceSize bufferSize = 32 * sizeof(deUint32);
861 const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
862 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
863 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
864
865 const Unique<VkDescriptorSetLayout> descriptorSetLayout(DescriptorSetLayoutBuilder()
866 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
867 .build(vkd, device));
868
869 // create pipeline layout
870 {
871 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
872 {
873 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
874 DE_NULL, // pNext
875 0u, // flags
876 1u, // setLayoutCount
877 &(*descriptorSetLayout), // pSetLayouts
878 0u, // pushConstantRangeCount
879 DE_NULL, // pPushConstantRanges
880 };
881 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
882 }
883
884 // create compute pipeline
885 {
886 const Unique<VkShaderModule> cs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
887 const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
888 {
889 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
890 DE_NULL, // pNext
891 (VkPipelineShaderStageCreateFlags)0u, // flags
892 VK_SHADER_STAGE_COMPUTE_BIT, // stage
893 *cs, // module
894 "main", // pName
895 DE_NULL, // pSpecializationInfo
896 };
897 const VkComputePipelineCreateInfo pipelineCreateInfo =
898 {
899 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
900 DE_NULL, // pNext
901 (VkPipelineCreateFlags)0u, // flags
902 pipelineShaderStageParams, // stage
903 *m_pipelineLayout, // layout
904 DE_NULL, // basePipelineHandle
905 0, // basePipelineIndex
906 };
907 m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
908 }
909
910 m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
911 m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
912 m_descriptorPool = DescriptorPoolBuilder()
913 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
914 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
915 const VkDescriptorSetAllocateInfo allocateParams =
916 {
917 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
918 DE_NULL, // pNext
919 *m_descriptorPool, // descriptorPool
920 1u, // setLayoutCount
921 &(*descriptorSetLayout), // pSetLayouts
922 };
923
924 m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
925 const VkDescriptorBufferInfo descriptorInfo =
926 {
927 m_buffer->object(), // buffer
928 0ull, // offset
929 bufferSize, // range
930 };
931
932 DescriptorSetUpdateBuilder()
933 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
934 .update(vkd, device);
935
936 // clear buffer
937 const std::vector<deUint8> data((size_t)bufferSize, 0u);
938 const Allocation& allocation = m_buffer->getBoundMemory();
939 void* allocationData = allocation.getHostPtr();
940 invalidateAlloc(vkd, device, allocation);
941 deMemcpy(allocationData, &data[0], (size_t)bufferSize);
942
943 const VkBufferMemoryBarrier barrier =
944 {
945 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
946 DE_NULL, // pNext
947 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
948 VK_ACCESS_HOST_READ_BIT, // dstAccessMask
949 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
950 VK_QUEUE_FAMILY_IGNORED, // destQueueFamilyIndex
951 m_buffer->object(), // buffer
952 0ull, // offset
953 bufferSize, // size
954 };
955 m_computeFinishBarrier = barrier;
956 }
957
958 class ComputeQueryTest : public ComputeQueryTestBase
959 {
960 public:
961 ComputeQueryTest (vkt::Context& context);
962 tcu::TestStatus iterate (void);
963 };
964
ComputeQueryTest(vkt::Context & context)965 ComputeQueryTest::ComputeQueryTest(vkt::Context& context)
966 : ComputeQueryTestBase(context)
967 {
968 }
969
iterate(void)970 tcu::TestStatus ComputeQueryTest::iterate(void)
971 {
972 const DeviceInterface& vkd = m_context.getDeviceInterface();
973 const VkDevice device = m_context.getDevice();
974 const VkQueue queue = m_context.getUniversalQueue();
975 const CmdPoolCreateInfo cmdPoolCreateInfo (m_context.getUniversalQueueFamilyIndex());
976 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
977 const Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
978 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
979
980 initStateObjects();
981 setupCounters();
982
983 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
984
985 if (!acquireProfilingLock())
986 {
987 // lock was not acquired in given time, we can't fail the test
988 return tcu::TestStatus::pass("Pass");
989 }
990
991 beginCommandBuffer(vkd, *resetCmdBuffer);
992 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
993 endCommandBuffer(vkd, *resetCmdBuffer);
994
995 beginCommandBuffer(vkd, *cmdBuffer, 0u);
996 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
997 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
998
999 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
1000 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1001 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
1002
1003 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1004 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1005 endCommandBuffer(vkd, *cmdBuffer);
1006
1007 // submit reset of queries only once
1008 {
1009 const VkSubmitInfo submitInfo =
1010 {
1011 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1012 DE_NULL, // pNext
1013 0u, // waitSemaphoreCount
1014 DE_NULL, // pWaitSemaphores
1015 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1016 1u, // commandBufferCount
1017 &resetCmdBuffer.get(), // pCommandBuffers
1018 0u, // signalSemaphoreCount
1019 DE_NULL, // pSignalSemaphores
1020 };
1021
1022 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1023 }
1024
1025 // submit command buffer for each pass and wait for its completion
1026 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1027 {
1028 const Unique<VkFence> fence(createFence(vkd, device));
1029
1030 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1031 {
1032 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1033 NULL,
1034 passIndex
1035 };
1036
1037 const VkSubmitInfo submitInfo =
1038 {
1039 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1040 &performanceQuerySubmitInfo, // pNext
1041 0u, // waitSemaphoreCount
1042 DE_NULL, // pWaitSemaphores
1043 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1044 1u, // commandBufferCount
1045 &cmdBuffer.get(), // pCommandBuffers
1046 0u, // signalSemaphoreCount
1047 DE_NULL, // pSignalSemaphores
1048 };
1049
1050 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1051 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1052 }
1053
1054 releaseProfilingLock();
1055
1056 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1057
1058 if (verifyQueryResults(*queryPool))
1059 return tcu::TestStatus::pass("Pass");
1060 return tcu::TestStatus::fail("Fail");
1061 }
1062
1063 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1064 {
1065 public:
1066 ComputeMultiplePoolsTest (vkt::Context& context);
1067 tcu::TestStatus iterate (void);
1068 };
1069
ComputeMultiplePoolsTest(vkt::Context & context)1070 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context& context)
1071 : ComputeQueryTestBase(context)
1072 {
1073 }
1074
iterate(void)1075 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1076 {
1077 if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
1078 throw tcu::NotSupportedError("MultipleQueryPools not supported");
1079
1080 const DeviceInterface& vkd = m_context.getDeviceInterface();
1081 const VkDevice device = m_context.getDevice();
1082 const VkQueue queue = m_context.getUniversalQueue();
1083 const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
1084 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1085 const Unique<VkCommandBuffer> resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1086 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1087
1088 initStateObjects();
1089 setupCounters();
1090
1091 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
1092 queryPool2(createQueryPool(1, 2));
1093
1094 if (!acquireProfilingLock())
1095 {
1096 // lock was not acquired in given time, we can't fail the test
1097 return tcu::TestStatus::pass("Pass");
1098 }
1099
1100 const VkQueryPool queryPools[] =
1101 {
1102 *queryPool1,
1103 *queryPool2
1104 };
1105
1106 beginCommandBuffer(vkd, *resetCmdBuffer);
1107 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1108 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1109 endCommandBuffer(vkd, *resetCmdBuffer);
1110
1111 beginCommandBuffer(vkd, *cmdBuffer, 0u);
1112 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1113 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1114
1115 // perform two queries
1116 for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1117 {
1118 const VkQueryPool queryPool = queryPools[loop];
1119 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1120 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1121 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1122 }
1123
1124 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1125 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1126 endCommandBuffer(vkd, *cmdBuffer);
1127
1128 // submit reset of queries only once
1129 {
1130 const VkSubmitInfo submitInfo =
1131 {
1132 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1133 DE_NULL, // pNext
1134 0u, // waitSemaphoreCount
1135 DE_NULL, // pWaitSemaphores
1136 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1137 1u, // commandBufferCount
1138 &resetCmdBuffer.get(), // pCommandBuffers
1139 0u, // signalSemaphoreCount
1140 DE_NULL, // pSignalSemaphores
1141 };
1142
1143 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1144 }
1145
1146 // submit command buffer for each pass and wait for its completion
1147 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1148 {
1149 const Unique<VkFence> fence(createFence(vkd, device));
1150
1151 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1152 {
1153 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1154 NULL,
1155 passIndex
1156 };
1157
1158 const VkSubmitInfo submitInfo =
1159 {
1160 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1161 &performanceQuerySubmitInfo, // pNext
1162 0u, // waitSemaphoreCount
1163 DE_NULL, // pWaitSemaphores
1164 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1165 1u, // commandBufferCount
1166 &cmdBuffer.get(), // pCommandBuffers
1167 0u, // signalSemaphoreCount
1168 DE_NULL, // pSignalSemaphores
1169 };
1170
1171 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1172 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1173 }
1174
1175 releaseProfilingLock();
1176
1177 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1178
1179 if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
1180 return tcu::TestStatus::pass("Pass");
1181 return tcu::TestStatus::fail("Fail");
1182 }
1183
1184 enum TestType
1185 {
1186 TT_ENUMERATE_AND_VALIDATE = 0,
1187 TT_QUERY,
1188 TT_MULTIPLE_POOLS
1189 };
1190
1191 class QueryPoolPerformanceTest : public TestCase
1192 {
1193 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,const char * name)1194 QueryPoolPerformanceTest (tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits, const char *name)
1195 : TestCase (context, name)
1196 , m_testType (testType)
1197 , m_queueFlagBits (queueFlagBits)
1198 {
1199 }
1200
createInstance(vkt::Context & context) const1201 vkt::TestInstance* createInstance (vkt::Context& context) const
1202 {
1203 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1204 return new EnumerateAndValidateTest(context, m_queueFlagBits);
1205
1206 if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1207 {
1208 if (m_testType == TT_QUERY)
1209 return new GraphicQueryTest(context);
1210 return new GraphicMultiplePoolsTest(context);
1211 }
1212
1213 // tests for VK_QUEUE_COMPUTE_BIT
1214 if (m_testType == TT_QUERY)
1215 return new ComputeQueryTest(context);
1216 return new ComputeMultiplePoolsTest(context);
1217 }
1218
initPrograms(SourceCollections & programCollection) const1219 void initPrograms (SourceCollections& programCollection) const
1220 {
1221 // validation test do not need programs
1222 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1223 return;
1224
1225 if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1226 {
1227 programCollection.glslSources.add("comp")
1228 << glu::ComputeSource("#version 430\n"
1229 "layout (local_size_x = 1) in;\n"
1230 "layout(binding = 0) writeonly buffer Output {\n"
1231 " uint values[];\n"
1232 "} sb_out;\n\n"
1233 "void main (void) {\n"
1234 " uint index = uint(gl_GlobalInvocationID.x);\n"
1235 " sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1236 "}\n");
1237 return;
1238 }
1239
1240 programCollection.glslSources.add("frag")
1241 << glu::FragmentSource("#version 430\n"
1242 "layout(location = 0) out vec4 out_FragColor;\n"
1243 "void main()\n"
1244 "{\n"
1245 " out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1246 "}\n");
1247
1248 programCollection.glslSources.add("vert")
1249 << glu::VertexSource("#version 430\n"
1250 "layout(location = 0) in vec4 in_Position;\n"
1251 "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1252 "void main() {\n"
1253 " gl_Position = in_Position;\n"
1254 " gl_PointSize = 1.0;\n"
1255 "}\n");
1256 }
1257
1258 private:
1259
1260 TestType m_testType;
1261 VkQueueFlagBits m_queueFlagBits;
1262 };
1263
1264 } //anonymous
1265
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1266 QueryPoolPerformanceTests::QueryPoolPerformanceTests (tcu::TestContext &testCtx)
1267 : TestCaseGroup(testCtx, "performance_query")
1268 {
1269 }
1270
init(void)1271 void QueryPoolPerformanceTests::init (void)
1272 {
1273 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT, "enumerate_and_validate_graphic"));
1274 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT, "enumerate_and_validate_compute"));
1275 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, "query_graphic"));
1276 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, "query_compute"));
1277 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, "multiple_pools_graphic"));
1278 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, "multiple_pools_compute"));
1279 }
1280
1281 } //QueryPool
1282 } //vkt
1283