1// Copyright 2016-2021 The Khronos Group, Inc. 2// 3// SPDX-License-Identifier: CC-BY-4.0 4 5include::{generated}/meta/{refprefix}VK_KHR_performance_query.txt[] 6 7=== Other Extension Metadata 8 9*Last Modified Date*:: 10 2019-10-08 11*IP Status*:: 12 No known IP claims. 13*Contributors*:: 14 - Jesse Barker, Unity Technologies 15 - Kenneth Benzie, Codeplay 16 - Jan-Harald Fredriksen, ARM 17 - Jeff Leger, Qualcomm 18 - Jesse Hall, Google 19 - Tobias Hector, AMD 20 - Neil Henning, Codeplay 21 - Baldur Karlsson 22 - Lionel Landwerlin, Intel 23 - Peter Lohrmann, AMD 24 - Alon Or-bach, Samsung 25 - Daniel Rakos, AMD 26 - Niklas Smedberg, Unity Technologies 27 - Igor Ostrowski, Intel 28 29=== Description 30 31The `VK_KHR_performance_query` extension adds a mechanism to allow querying 32of performance counters for use in applications and by profiling tools. 33 34Each queue family may: expose counters that can: be enabled on a queue of 35that family. 36We extend elink:VkQueryType to add a new query type for performance queries, 37and chain a structure on slink:VkQueryPoolCreateInfo to specify the 38performance queries to enable. 39 40include::{generated}/interfaces/VK_KHR_performance_query.txt[] 41 42=== Issues 43 441) Should this extension include a mechanism to begin a query in command 45buffer _A_ and end the query in command buffer _B_? 46 47*RESOLVED* No - queries are tied to command buffer creation and thus have to 48be encapsulated within a single command buffer. 49 502) Should this extension include a mechanism to begin and end queries 51globally on the queue, not using the existing command buffer commands? 52 53*RESOLVED* No - for the same reasoning as the resolution of 1). 54 553) Should this extension expose counters that require multiple passes? 56 57*RESOLVED* Yes - users should re-submit a command buffer with the same 58commands in it multiple times, specifying the pass to count as the query 59parameter in VkPerformanceQuerySubmitInfoKHR. 60 614) How to handle counters across parallel workloads? 62 63*RESOLVED* In the spirit of Vulkan, a counter description flag 64ename:VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR 65denotes that the accuracy of a counter result is affected by parallel 66workloads. 67 685) How to handle secondary command buffers? 69 70*RESOLVED* Secondary command buffers inherit any counter pass index 71specified in the parent primary command buffer. 72Note: this is no longer an issue after change from issue 10 resolution 73 746) What commands does the profiling lock have to be held for? 75 76*RESOLVED* For any command buffer that is being queried with a performance 77query pool, the profiling lock must: be held while that command buffer is in 78the _recording_, _executable_, or _pending state_. 79 807) Should we support flink:vkCmdCopyQueryPoolResults? 81 82*RESOLVED* Yes. 83 848) Should we allow performance queries to interact with multiview? 85 86*RESOLVED* Yes, but the performance queries must be performed once for each 87pass per view. 88 899) Should a `queryCount > 1` be usable for performance queries? 90 91*RESOLVED* Yes. 92Some vendors will have costly performance counter query pool creation, and 93would rather if a certain set of counters were to be used multiple times 94that a `queryCount > 1` can be used to amortize the instantiation cost. 95 9610) Should we introduce an indirect mechanism to set the counter pass index? 97 98*RESOLVED* Specify the counter pass index at submit time instead, to avoid 99requiring re-recording of command buffers when multiple counter passes are 100needed. 101 102 103=== Examples 104 105The following example shows how to find what performance counters a queue 106family supports, setup a query pool to record these performance counters, 107how to add the query pool to the command buffer to record information, and 108how to get the results from the query pool. 109 110[source,c++] 111-------------------------------------- 112// A previously created physical device 113VkPhysicalDevice physicalDevice; 114 115// One of the queue families our device supports 116uint32_t queueFamilyIndex; 117 118uint32_t counterCount; 119 120// Get the count of counters supported 121vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( 122 physicalDevice, 123 queueFamilyIndex, 124 &counterCount, 125 NULL, 126 NULL); 127 128VkPerformanceCounterKHR* counters = 129 malloc(sizeof(VkPerformanceCounterKHR) * counterCount); 130VkPerformanceCounterDescriptionKHR* counterDescriptions = 131 malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount); 132 133// Get the counters supported 134vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( 135 physicalDevice, 136 queueFamilyIndex, 137 &counterCount, 138 counters, 139 counterDescriptions); 140 141// Try to enable the first 8 counters 142uint32_t enabledCounters[8]; 143 144const uint32_t enabledCounterCount = min(counterCount, 8)); 145 146for (uint32_t i = 0; i < enabledCounterCount; i++) { 147 enabledCounters[i] = i; 148} 149 150// A previously created device that had the performanceCounterQueryPools feature 151// set to VK_TRUE 152VkDevice device; 153 154VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = { 155 VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, 156 NULL, 157 158 // Specify the queue family that this performance query is performed on 159 queueFamilyIndex, 160 161 // The number of counters to enable 162 enabledCounterCount, 163 164 // The array of indices of counters to enable 165 enabledCounters 166}; 167 168 169// Get the number of passes our counters will require. 170uint32_t numPasses; 171 172vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( 173 physicalDevice, 174 &performanceQueryCreateInfo, 175 &numPasses); 176 177VkQueryPoolCreateInfo queryPoolCreateInfo = { 178 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 179 &performanceQueryCreateInfo, 180 0, 181 182 // Using our new query type here 183 VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, 184 185 1, 186 187 0 188}; 189 190VkQueryPool queryPool; 191 192VkResult result = vkCreateQueryPool( 193 device, 194 &queryPoolCreateInfo, 195 NULL, 196 &queryPool); 197 198assert(VK_SUCCESS == result); 199 200// A queue from queueFamilyIndex 201VkQueue queue; 202 203// A command buffer we want to record counters on 204VkCommandBuffer commandBuffer; 205 206VkCommandBufferBeginInfo commandBufferBeginInfo = { 207 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 208 NULL, 209 0, 210 NULL 211}; 212 213VkAcquireProfilingLockInfoKHR lockInfo = { 214 VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR, 215 NULL, 216 0, 217 UINT64_MAX // Wait forever for the lock 218}; 219 220// Acquire the profiling lock before we record command buffers 221// that will use performance queries 222 223result = vkAcquireProfilingLockKHR(device, &lockInfo); 224 225assert(VK_SUCCESS == result); 226 227result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo); 228 229assert(VK_SUCCESS == result); 230 231vkCmdResetQueryPool( 232 commandBuffer, 233 queryPool, 234 0, 235 1); 236 237vkCmdBeginQuery( 238 commandBuffer, 239 queryPool, 240 0, 241 0); 242 243// Perform the commands you want to get performance information on 244// ... 245 246// Perform a barrier to ensure all previous commands were complete before 247// ending the query 248vkCmdPipelineBarrier(commandBuffer, 249 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 250 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 251 0, 252 0, 253 NULL, 254 0, 255 NULL, 256 0, 257 NULL); 258 259vkCmdEndQuery( 260 commandBuffer, 261 queryPool, 262 0); 263 264result = vkEndCommandBuffer(commandBuffer); 265 266assert(VK_SUCCESS == result); 267 268for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) { 269 270 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = { 271 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, 272 NULL, 273 counterPass 274 }; 275 276 277 // Submit the command buffer and wait for its completion 278 // ... 279} 280 281// Release the profiling lock after the command buffer is no longer in the 282// pending state. 283vkReleaseProfilingLockKHR(device); 284 285result = vkResetCommandBuffer(commandBuffer, 0); 286 287assert(VK_SUCCESS == result); 288 289// Create an array to hold the results of all counters 290VkPerformanceCounterResultKHR* recordedCounters = malloc( 291 sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount); 292 293result = vkGetQueryPoolResults( 294 device, 295 queryPool, 296 0, 297 1, 298 sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount, 299 recordedCounters, 300 sizeof(VkPerformanceCounterResultKHR), 301 NULL); 302 303// recordedCounters is filled with our counters, we will look at one for posterity 304switch (counters[0].storage) { 305 case VK_PERFORMANCE_COUNTER_STORAGE_INT32: 306 // use recordCounters[0].int32 to get at the counter result! 307 break; 308 case VK_PERFORMANCE_COUNTER_STORAGE_INT64: 309 // use recordCounters[0].int64 to get at the counter result! 310 break; 311 case VK_PERFORMANCE_COUNTER_STORAGE_UINT32: 312 // use recordCounters[0].uint32 to get at the counter result! 313 break; 314 case VK_PERFORMANCE_COUNTER_STORAGE_UINT64: 315 // use recordCounters[0].uint64 to get at the counter result! 316 break; 317 case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32: 318 // use recordCounters[0].float32 to get at the counter result! 319 break; 320 case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64: 321 // use recordCounters[0].float64 to get at the counter result! 322 break; 323} 324-------------------------------------- 325 326=== Version History 327 328 * Revision 1, 2019-10-08 329