1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Vulkan Performance Query Tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36
37 #include "deMath.h"
38
39 #include "tcuTestLog.hpp"
40 #include "tcuResource.hpp"
41 #include "tcuImageCompare.hpp"
42 #include "vkImageUtil.hpp"
43 #include "tcuCommandLine.hpp"
44 #include "tcuRGBA.hpp"
45
46 namespace vkt
47 {
48 namespace QueryPool
49 {
50 namespace
51 {
52
53 using namespace vk;
54 using namespace Draw;
55
uuidToHex(const deUint8 uuid[])56 std::string uuidToHex(const deUint8 uuid[])
57 {
58 const size_t bytesPerPart[] = {4, 2, 2, 2, 6};
59 const deUint8* ptr = &uuid[0];
60 const size_t stringSize = VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
61 std::string result;
62
63 result.reserve(stringSize);
64
65 for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
66 {
67 const size_t bytesInPart = bytesPerPart[partNdx];
68 const size_t symbolsInPart = 2 * bytesInPart;
69 deUint64 part = 0;
70 std::string partString;
71
72 for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
73 {
74 part = (part << 8) | *ptr;
75 ++ptr;
76 }
77
78 partString = tcu::toHex(part).toString();
79
80 DE_ASSERT(partString.size() > symbolsInPart);
81
82 result += (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
83
84 if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
85 result += '-';
86 }
87
88 DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
89 DE_ASSERT(result.size() == stringSize);
90
91 return result;
92 }
93
94 class EnumerateAndValidateTest : public TestInstance
95 {
96 public:
97 EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits);
98 tcu::TestStatus iterate (void);
99
100 protected:
101 void basicValidateCounter (const deUint32 familyIndex);
102
103 private:
104 VkQueueFlagBits m_queueFlagBits;
105 bool m_requiredExtensionsPresent;
106 };
107
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)108 EnumerateAndValidateTest::EnumerateAndValidateTest (vkt::Context& context, VkQueueFlagBits queueFlagBits)
109 : TestInstance(context)
110 , m_queueFlagBits(queueFlagBits)
111 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
112 {
113 }
114
iterate(void)115 tcu::TestStatus EnumerateAndValidateTest::iterate (void)
116 {
117 const InstanceInterface& vki = m_context.getInstanceInterface();
118 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
119 const std::vector<VkQueueFamilyProperties> queueProperties = getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
120
121 for (deUint32 queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
122 {
123 if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
124 continue;
125
126 deUint32 counterCount = 0;
127 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCount, DE_NULL, DE_NULL));
128
129 if (counterCount == 0)
130 continue;
131
132 {
133 std::vector<VkPerformanceCounterKHR> counters (counterCount);
134 deUint32 counterCountRead = counterCount;
135 std::map<std::string, size_t> uuidValidator;
136
137 if (counterCount > 1)
138 {
139 deUint32 incompleteCounterCount = counterCount - 1;
140 VkResult result;
141
142 result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
143 if (result != VK_INCOMPLETE)
144 TCU_FAIL("VK_INCOMPLETE not returned");
145 }
146
147 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
148
149 if (counterCountRead != counterCount)
150 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
151
152 for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
153 {
154 const VkPerformanceCounterKHR& counter = counters[counterNdx];
155 const std::string uuidStr = uuidToHex(counter.uuid);
156
157 if (uuidValidator.find(uuidStr) != uuidValidator.end())
158 TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
159 else
160 uuidValidator[uuidStr] = counterNdx;
161
162 if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
163 TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
164
165 if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
166 TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
167
168 if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
169 TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
170 }
171 }
172 {
173 std::vector<VkPerformanceCounterDescriptionKHR> counterDescriptors (counterCount);
174 deUint32 counterCountRead = counterCount;
175
176 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
177
178 if (counterCountRead != counterCount)
179 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) + ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
180
181 for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
182 {
183 const VkPerformanceCounterDescriptionKHR& counterDescriptor = counterDescriptors[counterNdx];
184 const VkPerformanceCounterDescriptionFlagsKHR allowedFlags = VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR
185 | VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
186
187 if ((counterDescriptor.flags & ~allowedFlags) != 0)
188 TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
189 }
190 }
191 }
192
193 return tcu::TestStatus::pass("Pass");
194 }
195
196 class QueryTestBase : public TestInstance
197 {
198 public:
199 QueryTestBase (vkt::Context& context);
200
201 protected:
202
203 void setupCounters (void);
204 Move<VkQueryPool> createQueryPool (deUint32 enabledCounterOffset, deUint32 enabledCounterStride);
205 bool acquireProfilingLock (void);
206 void releaseProfilingLock (void);
207 bool verifyQueryResults (VkQueryPool queryPool);
208 deUint32 getRequiredNumerOfPasses(void);
209
210 private:
211
212 bool m_requiredExtensionsPresent;
213 deUint32 m_requiredNumerOfPasses;
214 std::map<deUint64, deUint32> m_enabledCountersCountMap; // number of counters that were enabled per query pool
215 std::vector<VkPerformanceCounterKHR> m_counters; // counters provided by the device
216 };
217
QueryTestBase(vkt::Context & context)218 QueryTestBase::QueryTestBase(vkt::Context& context)
219 : TestInstance (context)
220 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
221 , m_requiredNumerOfPasses(0)
222 {
223 }
224
setupCounters()225 void QueryTestBase::setupCounters()
226 {
227 const InstanceInterface& vki = m_context.getInstanceInterface();
228 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
229 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
230 deUint32 queueFamilyIndex = cmdPoolCreateInfo.queueFamilyIndex;
231 deUint32 counterCount;
232
233 if (!m_context.getPerformanceQueryFeatures().performanceCounterQueryPools)
234 TCU_THROW(NotSupportedError, "Performance counter query pools feature not supported");
235
236 // get the number of supported counters
237 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, NULL, NULL));
238
239 if (!counterCount)
240 TCU_THROW(NotSupportedError, "QualityWarning: there are no performance counters");
241
242 // get supported counters
243 m_counters.resize(counterCount);
244 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex, &counterCount, &m_counters[0], DE_NULL));
245 }
246
createQueryPool(deUint32 enabledCounterOffset,deUint32 enabledCounterStride)247 Move<VkQueryPool> QueryTestBase::createQueryPool(deUint32 enabledCounterOffset, deUint32 enabledCounterStride)
248 {
249 const InstanceInterface& vki = m_context.getInstanceInterface();
250 const DeviceInterface& vkd = m_context.getDeviceInterface();
251 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
252 const VkDevice device = m_context.getDevice();
253 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
254 const deUint32 counterCount = (deUint32)m_counters.size();
255 deUint32 enabledIndex = enabledCounterOffset ? 0 : enabledCounterStride;
256 std::vector<deUint32> enabledCounters;
257
258 // enable every <enabledCounterStride> counter that has command or render pass scope
259 for (deUint32 i = 0; i < counterCount; i++)
260 {
261 // handle offset
262 if (enabledCounterOffset)
263 {
264 if (enabledCounterOffset == enabledIndex)
265 {
266 // disable handling offset
267 enabledCounterOffset = 0;
268
269 // eneble next index in stride condition
270 enabledIndex = enabledCounterStride;
271 }
272 else
273 {
274 ++enabledIndex;
275 continue;
276 }
277 }
278
279 // handle stride
280 if (enabledIndex == enabledCounterStride)
281 {
282 enabledCounters.push_back(i);
283 enabledIndex = 0;
284 }
285 else
286 ++enabledIndex;
287 }
288
289 // get number of counters that were enabled for this query pool
290 deUint32 enabledCountersCount = static_cast<deUint32>(enabledCounters.size());
291 if (!enabledCountersCount)
292 TCU_THROW(NotSupportedError, "QualityWarning: no performance counters");
293
294 // define performance query
295 VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo =
296 {
297 VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
298 NULL,
299 cmdPoolCreateInfo.queueFamilyIndex, // queue family that this performance query is performed on
300 enabledCountersCount, // number of counters to enable
301 &enabledCounters[0] // array of indices of counters to enable
302 };
303
304 // get the number of passes counters will require
305 vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &m_requiredNumerOfPasses);
306
307 // create query pool
308 VkQueryPoolCreateInfo queryPoolCreateInfo =
309 {
310 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
311 &performanceQueryCreateInfo,
312 0, // flags
313 VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, // new query type
314 1, // queryCount
315 0
316 };
317
318 Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
319
320 // memorize number of enabled counters for this query pool
321 m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
322
323 return queryPool;
324 }
325
acquireProfilingLock()326 bool QueryTestBase::acquireProfilingLock()
327 {
328 const DeviceInterface& vkd = m_context.getDeviceInterface();
329 const VkDevice device = m_context.getDevice();
330
331 // acquire profiling lock before we record command buffers
332 VkAcquireProfilingLockInfoKHR lockInfo =
333 {
334 VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
335 NULL,
336 0,
337 2000000000ull // wait 2s for the lock
338 };
339
340 VkResult result = vkd.acquireProfilingLockKHR(device, &lockInfo);
341 if (result == VK_TIMEOUT)
342 {
343 m_context.getTestContext().getLog() << tcu::TestLog::Message
344 << "Timeout reached, profiling lock wasn't acquired - test had to end earlier"
345 << tcu::TestLog::EndMessage;
346 return false;
347 }
348 if (result != VK_SUCCESS)
349 TCU_FAIL("Profiling lock wasn't acquired");
350
351 return true;
352 }
353
releaseProfilingLock()354 void QueryTestBase::releaseProfilingLock()
355 {
356 const DeviceInterface& vkd = m_context.getDeviceInterface();
357 const VkDevice device = m_context.getDevice();
358
359 // release the profiling lock after the command buffer is no longer in the pending state
360 vkd.releaseProfilingLockKHR(device);
361 }
362
verifyQueryResults(VkQueryPool queryPool)363 bool QueryTestBase::verifyQueryResults(VkQueryPool queryPool)
364 {
365 const DeviceInterface& vkd = m_context.getDeviceInterface();
366 const VkDevice device = m_context.getDevice();
367
368 // create an array to hold the results of all counters
369 deUint32 enabledCounterCount = m_enabledCountersCountMap[queryPool.getInternal()];
370 std::vector<VkPerformanceCounterResultKHR> recordedCounters(enabledCounterCount);
371
372 // verify that query result can be retrieved
373 VkResult result = vkd.getQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
374 &recordedCounters[0], sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT);
375 if (result == VK_NOT_READY)
376 {
377 m_context.getTestContext().getLog() << tcu::TestLog::Message
378 << "Pass but result is not ready"
379 << tcu::TestLog::EndMessage;
380 return true;
381 }
382 return (result == VK_SUCCESS);
383 }
384
getRequiredNumerOfPasses()385 deUint32 QueryTestBase::getRequiredNumerOfPasses()
386 {
387 return m_requiredNumerOfPasses;
388 }
389
390 // Base class for all graphic tests
391 class GraphicQueryTestBase : public QueryTestBase
392 {
393 public:
394 GraphicQueryTestBase(vkt::Context& context);
395
396 protected:
397 void initStateObjects(void);
398
399 protected:
400 Move<VkPipeline> m_pipeline;
401 Move<VkPipelineLayout> m_pipelineLayout;
402
403 de::SharedPtr<Image> m_colorAttachmentImage;
404 Move<VkImageView> m_attachmentView;
405
406 Move<VkRenderPass> m_renderPass;
407 Move<VkFramebuffer> m_framebuffer;
408
409 de::SharedPtr<Buffer> m_vertexBuffer;
410
411 VkFormat m_colorAttachmentFormat;
412 deUint32 m_size;
413 };
414
GraphicQueryTestBase(vkt::Context & context)415 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context& context)
416 : QueryTestBase(context)
417 , m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
418 , m_size(32)
419 {
420 }
421
initStateObjects(void)422 void GraphicQueryTestBase::initStateObjects(void)
423 {
424 const VkDevice device = m_context.getDevice();
425 const DeviceInterface& vkd = m_context.getDeviceInterface();
426
427 //attachment images and views
428 {
429 VkExtent3D imageExtent =
430 {
431 m_size, // width
432 m_size, // height
433 1 // depth
434 };
435
436 const ImageCreateInfo colorImageCreateInfo(VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1,
437 VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL,
438 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
439
440 m_colorAttachmentImage = Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
441 m_context.getUniversalQueueFamilyIndex());
442
443 const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D, m_colorAttachmentFormat);
444 m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
445 }
446
447 // renderpass and framebuffer
448 {
449 RenderPassCreateInfo renderPassCreateInfo;
450 renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat, // format
451 VK_SAMPLE_COUNT_1_BIT, // samples
452 VK_ATTACHMENT_LOAD_OP_CLEAR, // loadOp
453 VK_ATTACHMENT_STORE_OP_DONT_CARE, // storeOp
454 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // stencilLoadOp
455 VK_ATTACHMENT_STORE_OP_DONT_CARE, // stencilLoadOp
456 VK_IMAGE_LAYOUT_GENERAL, // initialLauout
457 VK_IMAGE_LAYOUT_GENERAL)); // finalLayout
458
459 const VkAttachmentReference colorAttachmentReference =
460 {
461 0, // attachment
462 VK_IMAGE_LAYOUT_GENERAL // layout
463 };
464
465 renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
466 0, // flags
467 0, // inputCount
468 DE_NULL, // pInputAttachments
469 1, // colorCount
470 &colorAttachmentReference, // pColorAttachments
471 DE_NULL, // pResolveAttachments
472 AttachmentReference(), // depthStencilAttachment
473 0, // preserveCount
474 DE_NULL)); // preserveAttachments
475
476 m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
477
478 std::vector<VkImageView> attachments(1);
479 attachments[0] = *m_attachmentView;
480
481 FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
482 m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
483 }
484
485 // pipeline
486 {
487 Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
488 Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
489
490 const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
491
492 const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
493 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
494
495 const VkVertexInputBindingDescription vf_binding_desc =
496 {
497 0, // binding
498 4 * (deUint32)sizeof(float), // stride
499 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
500 };
501
502 const VkVertexInputAttributeDescription vf_attribute_desc =
503 {
504 0, // location
505 0, // binding
506 VK_FORMAT_R32G32B32A32_SFLOAT, // format
507 0 // offset
508 };
509
510 const VkPipelineVertexInputStateCreateInfo vf_info =
511 {
512 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // sType
513 NULL, // pNext
514 0u, // flags
515 1, // vertexBindingDescriptionCount
516 &vf_binding_desc, // pVertexBindingDescriptions
517 1, // vertexAttributeDescriptionCount
518 &vf_attribute_desc // pVertexAttributeDescriptions
519 };
520
521 PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
522 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
523 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
524 pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
525 pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
526 const VkViewport viewport = makeViewport(m_size, m_size);
527 const VkRect2D scissor = makeRect2D(m_size, m_size);
528 pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport), std::vector<VkRect2D>(1, scissor)));
529 pipelineCreateInfo.addState(PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
530 pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
531 pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
532 pipelineCreateInfo.addState(vf_info);
533 m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
534 }
535
536 // vertex buffer
537 {
538 std::vector<tcu::Vec4> vertices(3);
539 vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
540 vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
541 vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
542
543 const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
544 m_vertexBuffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
545
546 tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4*>(m_vertexBuffer->getBoundMemory().getHostPtr());
547 deMemcpy(ptr, &vertices[0], kBufferSize);
548
549 flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
550 }
551 }
552
553
554 class GraphicQueryTest : public GraphicQueryTestBase
555 {
556 public:
557 GraphicQueryTest (vkt::Context& context);
558 tcu::TestStatus iterate (void);
559 };
560
GraphicQueryTest(vkt::Context & context)561 GraphicQueryTest::GraphicQueryTest(vkt::Context& context)
562 : GraphicQueryTestBase(context)
563 {
564 }
565
iterate(void)566 tcu::TestStatus GraphicQueryTest::iterate(void)
567 {
568 const DeviceInterface& vkd = m_context.getDeviceInterface();
569 const VkDevice device = m_context.getDevice();
570 const VkQueue queue = m_context.getUniversalQueue();
571 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
572 Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
573 Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
574
575 initStateObjects();
576 setupCounters();
577
578 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
579
580 if (!acquireProfilingLock())
581 {
582 // lock was not acquired in given time, we can't fail the test
583 return tcu::TestStatus::pass("Pass");
584 }
585
586 // reset query pool
587 {
588 Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
589 const Unique<VkFence> fence (createFence(vkd, device));
590 const VkSubmitInfo submitInfo =
591 {
592 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
593 DE_NULL, // pNext
594 0u, // waitSemaphoreCount
595 DE_NULL, // pWaitSemaphores
596 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
597 1u, // commandBufferCount
598 &resetCmdBuffer.get(), // pCommandBuffers
599 0u, // signalSemaphoreCount
600 DE_NULL, // pSignalSemaphores
601 };
602
603 beginCommandBuffer(vkd, *resetCmdBuffer);
604 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
605 endCommandBuffer(vkd, *resetCmdBuffer);
606
607 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
608 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
609 }
610
611 // begin command buffer
612 beginCommandBuffer(vkd, *cmdBuffer, 0u);
613
614 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
615 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
616
617 // begin render pass
618 VkClearValue renderPassClearValue;
619 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
620
621 // perform query during triangle draw
622 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
623
624 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
625 makeRect2D(0, 0, m_size, m_size),
626 1, &renderPassClearValue);
627
628 // bind pipeline
629 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
630
631 // bind vertex buffer
632 VkBuffer vertexBuffer = m_vertexBuffer->object();
633 const VkDeviceSize vertexBufferOffset = 0;
634 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
635
636 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
637
638 endRenderPass(vkd, *cmdBuffer);
639
640 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
641
642 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
643 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
644 VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
645
646 endCommandBuffer(vkd, *cmdBuffer);
647
648 // submit command buffer for each pass and wait for its completion
649 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
650 {
651 const Unique<VkFence> fence(createFence(vkd, device));
652
653 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
654 {
655 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
656 NULL,
657 passIndex
658 };
659
660 const VkSubmitInfo submitInfo =
661 {
662 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
663 &performanceQuerySubmitInfo, // pNext
664 0u, // waitSemaphoreCount
665 DE_NULL, // pWaitSemaphores
666 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
667 1u, // commandBufferCount
668 &cmdBuffer.get(), // pCommandBuffers
669 0u, // signalSemaphoreCount
670 DE_NULL, // pSignalSemaphores
671 };
672
673 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
674 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
675 }
676
677 releaseProfilingLock();
678
679 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
680
681 if (verifyQueryResults(*queryPool))
682 return tcu::TestStatus::pass("Pass");
683 return tcu::TestStatus::fail("Fail");
684 }
685
686 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
687 {
688 public:
689 GraphicMultiplePoolsTest (vkt::Context& context);
690 tcu::TestStatus iterate (void);
691 };
692
GraphicMultiplePoolsTest(vkt::Context & context)693 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context& context)
694 : GraphicQueryTestBase(context)
695 {
696 }
697
iterate(void)698 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
699 {
700 if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
701 throw tcu::NotSupportedError("MultipleQueryPools not supported");
702
703 const DeviceInterface& vkd = m_context.getDeviceInterface();
704 const VkDevice device = m_context.getDevice();
705 const VkQueue queue = m_context.getUniversalQueue();
706 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
707 Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
708 Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
709
710 initStateObjects();
711 setupCounters();
712
713 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
714 queryPool2(createQueryPool(1, 2));
715
716 if (!acquireProfilingLock())
717 {
718 // lock was not acquired in given time, we can't fail the test
719 return tcu::TestStatus::pass("Pass");
720 }
721
722 // reset query pools
723 {
724 Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
725 const Unique<VkFence> fence (createFence(vkd, device));
726 const VkSubmitInfo submitInfo =
727 {
728 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
729 DE_NULL, // pNext
730 0u, // waitSemaphoreCount
731 DE_NULL, // pWaitSemaphores
732 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
733 1u, // commandBufferCount
734 &resetCmdBuffer.get(), // pCommandBuffers
735 0u, // signalSemaphoreCount
736 DE_NULL, // pSignalSemaphores
737 };
738
739 beginCommandBuffer(vkd, *resetCmdBuffer);
740 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
741 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
742 endCommandBuffer(vkd, *resetCmdBuffer);
743
744 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
745 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
746 }
747
748 // begin command buffer
749 beginCommandBuffer(vkd, *cmdBuffer, 0u);
750
751 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
752 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
753
754 // begin render pass
755 VkClearValue renderPassClearValue;
756 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
757
758 VkBuffer vertexBuffer = m_vertexBuffer->object();
759 const VkDeviceSize vertexBufferOffset = 0;
760 const VkQueryPool queryPools[] =
761 {
762 *queryPool1,
763 *queryPool2
764 };
765
766 // perform two queries during triangle draw
767 for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
768 {
769 const VkQueryPool queryPool = queryPools[loop];
770 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
771 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer,
772 makeRect2D(0, 0, m_size, m_size),
773 1, &renderPassClearValue);
774
775 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
776 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
777 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
778
779 endRenderPass(vkd, *cmdBuffer);
780 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
781 }
782
783 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
784 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
785 VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
786
787 endCommandBuffer(vkd, *cmdBuffer);
788
789 // submit command buffer for each pass and wait for its completion
790 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
791 {
792 const Unique<VkFence> fence(createFence(vkd, device));
793
794 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
795 {
796 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
797 NULL,
798 passIndex
799 };
800
801 const VkSubmitInfo submitInfo =
802 {
803 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
804 &performanceQuerySubmitInfo, // pNext
805 0u, // waitSemaphoreCount
806 DE_NULL, // pWaitSemaphores
807 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
808 1u, // commandBufferCount
809 &cmdBuffer.get(), // pCommandBuffers
810 0u, // signalSemaphoreCount
811 DE_NULL, // pSignalSemaphores
812 };
813
814 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
815 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
816 }
817
818 releaseProfilingLock();
819
820 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
821
822 if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
823 return tcu::TestStatus::pass("Pass");
824 return tcu::TestStatus::fail("Fail");
825 }
826
827 // Base class for all compute tests
828 class ComputeQueryTestBase : public QueryTestBase
829 {
830 public:
831 ComputeQueryTestBase(vkt::Context& context);
832
833 protected:
834 void initStateObjects(void);
835
836 protected:
837 Move<VkPipeline> m_pipeline;
838 Move<VkPipelineLayout> m_pipelineLayout;
839 de::SharedPtr<Buffer> m_buffer;
840 Move<VkDescriptorPool> m_descriptorPool;
841 Move<VkDescriptorSet> m_descriptorSet;
842 VkDescriptorBufferInfo m_descriptorBufferInfo;
843 VkBufferMemoryBarrier m_computeFinishBarrier;
844 };
845
ComputeQueryTestBase(vkt::Context & context)846 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context& context)
847 : QueryTestBase(context)
848 {
849 }
850
initStateObjects(void)851 void ComputeQueryTestBase::initStateObjects(void)
852 {
853 const DeviceInterface& vkd = m_context.getDeviceInterface();
854 const VkDevice device = m_context.getDevice();
855 const VkDeviceSize bufferSize = 32 * sizeof(deUint32);
856 const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
857 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
858 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
859
860 const Unique<VkDescriptorSetLayout> descriptorSetLayout(DescriptorSetLayoutBuilder()
861 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
862 .build(vkd, device));
863
864 // create pipeline layout
865 {
866 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
867 {
868 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
869 DE_NULL, // pNext
870 0u, // flags
871 1u, // setLayoutCount
872 &(*descriptorSetLayout), // pSetLayouts
873 0u, // pushConstantRangeCount
874 DE_NULL, // pPushConstantRanges
875 };
876 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
877 }
878
879 // create compute pipeline
880 {
881 const Unique<VkShaderModule> cs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
882 const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
883 {
884 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
885 DE_NULL, // pNext
886 (VkPipelineShaderStageCreateFlags)0u, // flags
887 VK_SHADER_STAGE_COMPUTE_BIT, // stage
888 *cs, // module
889 "main", // pName
890 DE_NULL, // pSpecializationInfo
891 };
892 const VkComputePipelineCreateInfo pipelineCreateInfo =
893 {
894 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
895 DE_NULL, // pNext
896 (VkPipelineCreateFlags)0u, // flags
897 pipelineShaderStageParams, // stage
898 *m_pipelineLayout, // layout
899 DE_NULL, // basePipelineHandle
900 0, // basePipelineIndex
901 };
902 m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
903 }
904
905 m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
906 m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
907 m_descriptorPool = DescriptorPoolBuilder()
908 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
909 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
910 const VkDescriptorSetAllocateInfo allocateParams =
911 {
912 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
913 DE_NULL, // pNext
914 *m_descriptorPool, // descriptorPool
915 1u, // setLayoutCount
916 &(*descriptorSetLayout), // pSetLayouts
917 };
918
919 m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
920 const VkDescriptorBufferInfo descriptorInfo =
921 {
922 m_buffer->object(), // buffer
923 0ull, // offset
924 bufferSize, // range
925 };
926
927 DescriptorSetUpdateBuilder()
928 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
929 .update(vkd, device);
930
931 // clear buffer
932 const std::vector<deUint8> data((size_t)bufferSize, 0u);
933 const Allocation& allocation = m_buffer->getBoundMemory();
934 void* allocationData = allocation.getHostPtr();
935 invalidateAlloc(vkd, device, allocation);
936 deMemcpy(allocationData, &data[0], (size_t)bufferSize);
937
938 const VkBufferMemoryBarrier barrier =
939 {
940 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
941 DE_NULL, // pNext
942 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
943 VK_ACCESS_HOST_READ_BIT, // dstAccessMask
944 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
945 VK_QUEUE_FAMILY_IGNORED, // destQueueFamilyIndex
946 m_buffer->object(), // buffer
947 0ull, // offset
948 bufferSize, // size
949 };
950 m_computeFinishBarrier = barrier;
951 }
952
953 class ComputeQueryTest : public ComputeQueryTestBase
954 {
955 public:
956 ComputeQueryTest (vkt::Context& context);
957 tcu::TestStatus iterate (void);
958 };
959
ComputeQueryTest(vkt::Context & context)960 ComputeQueryTest::ComputeQueryTest(vkt::Context& context)
961 : ComputeQueryTestBase(context)
962 {
963 }
964
iterate(void)965 tcu::TestStatus ComputeQueryTest::iterate(void)
966 {
967 const DeviceInterface& vkd = m_context.getDeviceInterface();
968 const VkDevice device = m_context.getDevice();
969 const VkQueue queue = m_context.getUniversalQueue();
970 const CmdPoolCreateInfo cmdPoolCreateInfo (m_context.getUniversalQueueFamilyIndex());
971 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, &cmdPoolCreateInfo));
972 const Unique<VkCommandBuffer> resetCmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
973 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
974
975 initStateObjects();
976 setupCounters();
977
978 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
979
980 if (!acquireProfilingLock())
981 {
982 // lock was not acquired in given time, we can't fail the test
983 return tcu::TestStatus::pass("Pass");
984 }
985
986 beginCommandBuffer(vkd, *resetCmdBuffer);
987 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
988 endCommandBuffer(vkd, *resetCmdBuffer);
989
990 beginCommandBuffer(vkd, *cmdBuffer, 0u);
991 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
992 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
993
994 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
995 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
996 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
997
998 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
999 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1000 endCommandBuffer(vkd, *cmdBuffer);
1001
1002 // submit reset of queries only once
1003 {
1004 const VkSubmitInfo submitInfo =
1005 {
1006 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1007 DE_NULL, // pNext
1008 0u, // waitSemaphoreCount
1009 DE_NULL, // pWaitSemaphores
1010 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1011 1u, // commandBufferCount
1012 &resetCmdBuffer.get(), // pCommandBuffers
1013 0u, // signalSemaphoreCount
1014 DE_NULL, // pSignalSemaphores
1015 };
1016
1017 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1018 }
1019
1020 // submit command buffer for each pass and wait for its completion
1021 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1022 {
1023 const Unique<VkFence> fence(createFence(vkd, device));
1024
1025 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1026 {
1027 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1028 NULL,
1029 passIndex
1030 };
1031
1032 const VkSubmitInfo submitInfo =
1033 {
1034 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1035 &performanceQuerySubmitInfo, // pNext
1036 0u, // waitSemaphoreCount
1037 DE_NULL, // pWaitSemaphores
1038 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1039 1u, // commandBufferCount
1040 &cmdBuffer.get(), // pCommandBuffers
1041 0u, // signalSemaphoreCount
1042 DE_NULL, // pSignalSemaphores
1043 };
1044
1045 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1046 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1047 }
1048
1049 releaseProfilingLock();
1050
1051 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1052
1053 if (verifyQueryResults(*queryPool))
1054 return tcu::TestStatus::pass("Pass");
1055 return tcu::TestStatus::fail("Fail");
1056 }
1057
1058 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1059 {
1060 public:
1061 ComputeMultiplePoolsTest (vkt::Context& context);
1062 tcu::TestStatus iterate (void);
1063 };
1064
ComputeMultiplePoolsTest(vkt::Context & context)1065 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context& context)
1066 : ComputeQueryTestBase(context)
1067 {
1068 }
1069
iterate(void)1070 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1071 {
1072 if (!m_context.getPerformanceQueryFeatures().performanceCounterMultipleQueryPools)
1073 throw tcu::NotSupportedError("MultipleQueryPools not supported");
1074
1075 const DeviceInterface& vkd = m_context.getDeviceInterface();
1076 const VkDevice device = m_context.getDevice();
1077 const VkQueue queue = m_context.getUniversalQueue();
1078 const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
1079 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1080 const Unique<VkCommandBuffer> resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1081 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1082
1083 initStateObjects();
1084 setupCounters();
1085
1086 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)),
1087 queryPool2(createQueryPool(1, 2));
1088
1089 if (!acquireProfilingLock())
1090 {
1091 // lock was not acquired in given time, we can't fail the test
1092 return tcu::TestStatus::pass("Pass");
1093 }
1094
1095 const VkQueryPool queryPools[] =
1096 {
1097 *queryPool1,
1098 *queryPool2
1099 };
1100
1101 beginCommandBuffer(vkd, *resetCmdBuffer);
1102 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1103 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1104 endCommandBuffer(vkd, *resetCmdBuffer);
1105
1106 beginCommandBuffer(vkd, *cmdBuffer, 0u);
1107 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1108 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
1109
1110 // perform two queries
1111 for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1112 {
1113 const VkQueryPool queryPool = queryPools[loop];
1114 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1115 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1116 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1117 }
1118
1119 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1120 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
1121 endCommandBuffer(vkd, *cmdBuffer);
1122
1123 // submit reset of queries only once
1124 {
1125 const VkSubmitInfo submitInfo =
1126 {
1127 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1128 DE_NULL, // pNext
1129 0u, // waitSemaphoreCount
1130 DE_NULL, // pWaitSemaphores
1131 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1132 1u, // commandBufferCount
1133 &resetCmdBuffer.get(), // pCommandBuffers
1134 0u, // signalSemaphoreCount
1135 DE_NULL, // pSignalSemaphores
1136 };
1137
1138 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1139 }
1140
1141 // submit command buffer for each pass and wait for its completion
1142 for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
1143 {
1144 const Unique<VkFence> fence(createFence(vkd, device));
1145
1146 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo =
1147 {
1148 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
1149 NULL,
1150 passIndex
1151 };
1152
1153 const VkSubmitInfo submitInfo =
1154 {
1155 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1156 &performanceQuerySubmitInfo, // pNext
1157 0u, // waitSemaphoreCount
1158 DE_NULL, // pWaitSemaphores
1159 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1160 1u, // commandBufferCount
1161 &cmdBuffer.get(), // pCommandBuffers
1162 0u, // signalSemaphoreCount
1163 DE_NULL, // pSignalSemaphores
1164 };
1165
1166 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1167 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
1168 }
1169
1170 releaseProfilingLock();
1171
1172 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1173
1174 if (verifyQueryResults(*queryPool1) && verifyQueryResults(*queryPool2))
1175 return tcu::TestStatus::pass("Pass");
1176 return tcu::TestStatus::fail("Fail");
1177 }
1178
1179 enum TestType
1180 {
1181 TT_ENUMERATE_AND_VALIDATE = 0,
1182 TT_QUERY,
1183 TT_MULTIPLE_POOLS
1184 };
1185
1186 class QueryPoolPerformanceTest : public TestCase
1187 {
1188 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,const char * name)1189 QueryPoolPerformanceTest (tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits, const char *name)
1190 : TestCase (context, name, "")
1191 , m_testType (testType)
1192 , m_queueFlagBits (queueFlagBits)
1193 {
1194 }
1195
createInstance(vkt::Context & context) const1196 vkt::TestInstance* createInstance (vkt::Context& context) const
1197 {
1198 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1199 return new EnumerateAndValidateTest(context, m_queueFlagBits);
1200
1201 if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1202 {
1203 if (m_testType == TT_QUERY)
1204 return new GraphicQueryTest(context);
1205 return new GraphicMultiplePoolsTest(context);
1206 }
1207
1208 // tests for VK_QUEUE_COMPUTE_BIT
1209 if (m_testType == TT_QUERY)
1210 return new ComputeQueryTest(context);
1211 return new ComputeMultiplePoolsTest(context);
1212 }
1213
initPrograms(SourceCollections & programCollection) const1214 void initPrograms (SourceCollections& programCollection) const
1215 {
1216 // validation test do not need programs
1217 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1218 return;
1219
1220 if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1221 {
1222 programCollection.glslSources.add("comp")
1223 << glu::ComputeSource("#version 430\n"
1224 "layout (local_size_x = 1) in;\n"
1225 "layout(binding = 0) writeonly buffer Output {\n"
1226 " uint values[];\n"
1227 "} sb_out;\n\n"
1228 "void main (void) {\n"
1229 " uint index = uint(gl_GlobalInvocationID.x);\n"
1230 " sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1231 "}\n");
1232 return;
1233 }
1234
1235 programCollection.glslSources.add("frag")
1236 << glu::FragmentSource("#version 430\n"
1237 "layout(location = 0) out vec4 out_FragColor;\n"
1238 "void main()\n"
1239 "{\n"
1240 " out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1241 "}\n");
1242
1243 programCollection.glslSources.add("vert")
1244 << glu::VertexSource("#version 430\n"
1245 "layout(location = 0) in vec4 in_Position;\n"
1246 "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1247 "void main() {\n"
1248 " gl_Position = in_Position;\n"
1249 " gl_PointSize = 1.0;\n"
1250 "}\n");
1251 }
1252
1253 private:
1254
1255 TestType m_testType;
1256 VkQueueFlagBits m_queueFlagBits;
1257 };
1258
1259 } //anonymous
1260
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1261 QueryPoolPerformanceTests::QueryPoolPerformanceTests (tcu::TestContext &testCtx)
1262 : TestCaseGroup(testCtx, "performance_query", "Tests for performance queries")
1263 {
1264 }
1265
init(void)1266 void QueryPoolPerformanceTests::init (void)
1267 {
1268 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT, "enumerate_and_validate_graphic"));
1269 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT, "enumerate_and_validate_compute"));
1270 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, "query_graphic"));
1271 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, "query_compute"));
1272 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, "multiple_pools_graphic"));
1273 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, "multiple_pools_compute"));
1274 }
1275
1276 } //QueryPool
1277 } //vkt
1278