• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2016-2024 The Khronos Group Inc.
2//
3// SPDX-License-Identifier: CC-BY-4.0
4
5include::{generated}/meta/{refprefix}VK_KHR_performance_query.adoc[]
6
7=== Other Extension Metadata
8
9*Last Modified Date*::
10    2019-10-08
11*IP Status*::
12    No known IP claims.
13*Contributors*::
14  - Jesse Barker, Unity Technologies
15  - Kenneth Benzie, Codeplay
16  - Jan-Harald Fredriksen, ARM
17  - Jeff Leger, Qualcomm
18  - Jesse Hall, Google
19  - Tobias Hector, AMD
20  - Neil Henning, Codeplay
21  - Baldur Karlsson
22  - Lionel Landwerlin, Intel
23  - Peter Lohrmann, AMD
24  - Alon Or-bach, Samsung
25  - Daniel Rakos, AMD
26  - Niklas Smedberg, Unity Technologies
27  - Igor Ostrowski, Intel
28
29=== Description
30
31The `VK_KHR_performance_query` extension adds a mechanism to allow querying
32of performance counters for use in applications and by profiling tools.
33
34Each queue family may: expose counters that can: be enabled on a queue of
35that family.
36We extend elink:VkQueryType to add a new query type for performance queries,
37and chain a structure on slink:VkQueryPoolCreateInfo to specify the
38performance queries to enable.
39
40include::{generated}/interfaces/VK_KHR_performance_query.adoc[]
41
42=== Issues
43
441) Should this extension include a mechanism to begin a query in command
45buffer _A_ and end the query in command buffer _B_?
46
47*RESOLVED* No - queries are tied to command buffer creation and thus have to
48be encapsulated within a single command buffer.
49
502) Should this extension include a mechanism to begin and end queries
51globally on the queue, not using the existing command buffer commands?
52
53*RESOLVED* No - for the same reasoning as the resolution of 1).
54
553) Should this extension expose counters that require multiple passes?
56
57*RESOLVED* Yes - users should re-submit a command buffer with the same
58commands in it multiple times, specifying the pass to count as the query
59parameter in VkPerformanceQuerySubmitInfoKHR.
60
614) How to handle counters across parallel workloads?
62
63*RESOLVED* In the spirit of Vulkan, a counter description flag
64ename:VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR
65denotes that the accuracy of a counter result is affected by parallel
66workloads.
67
685) How to handle secondary command buffers?
69
70*RESOLVED* Secondary command buffers inherit any counter pass index
71specified in the parent primary command buffer.
72Note: this is no longer an issue after change from issue 10 resolution
73
746) What commands does the profiling lock have to be held for?
75
76*RESOLVED* For any command buffer that is being queried with a performance
77query pool, the profiling lock must: be held while that command buffer is in
78the _recording_, _executable_, or _pending state_.
79
807) Should we support flink:vkCmdCopyQueryPoolResults?
81
82*RESOLVED* Yes.
83
848) Should we allow performance queries to interact with multiview?
85
86*RESOLVED* Yes, but the performance queries must be performed once for each
87pass per view.
88
899) Should a `queryCount > 1` be usable for performance queries?
90
91*RESOLVED* Yes.
92Some vendors will have costly performance counter query pool creation, and
93would rather if a certain set of counters were to be used multiple times
94that a `queryCount > 1` can be used to amortize the instantiation cost.
95
9610) Should we introduce an indirect mechanism to set the counter pass index?
97
98*RESOLVED* Specify the counter pass index at submit time instead, to avoid
99requiring re-recording of command buffers when multiple counter passes are
100needed.
101
102
103=== Examples
104
105The following example shows how to find what performance counters a queue
106family supports, setup a query pool to record these performance counters,
107how to add the query pool to the command buffer to record information, and
108how to get the results from the query pool.
109
110[source,c++]
111----
112// A previously created physical device
113VkPhysicalDevice physicalDevice;
114
115// One of the queue families our device supports
116uint32_t queueFamilyIndex;
117
118uint32_t counterCount;
119
120// Get the count of counters supported
121vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
122  physicalDevice,
123  queueFamilyIndex,
124  &counterCount,
125  NULL,
126  NULL);
127
128VkPerformanceCounterKHR* counters =
129  malloc(sizeof(VkPerformanceCounterKHR) * counterCount);
130VkPerformanceCounterDescriptionKHR* counterDescriptions =
131  malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount);
132
133// Get the counters supported
134vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
135  physicalDevice,
136  queueFamilyIndex,
137  &counterCount,
138  counters,
139  counterDescriptions);
140
141// Try to enable the first 8 counters
142uint32_t enabledCounters[8];
143
144const uint32_t enabledCounterCount = min(counterCount, 8));
145
146for (uint32_t i = 0; i < enabledCounterCount; i++) {
147  enabledCounters[i] = i;
148}
149
150// A previously created device that had the performanceCounterQueryPools feature
151// set to VK_TRUE
152VkDevice device;
153
154VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
155  .sType = VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
156  .pNext = NULL,
157
158  // Specify the queue family that this performance query is performed on
159  .queueFamilyIndex = queueFamilyIndex,
160
161  // The number of counters to enable
162  .counterIndexCount = enabledCounterCount,
163
164  // The array of indices of counters to enable
165  .pCounterIndices = enabledCounters
166};
167
168
169// Get the number of passes our counters will require.
170uint32_t numPasses;
171
172vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
173  physicalDevice,
174  &performanceQueryCreateInfo,
175  &numPasses);
176
177VkQueryPoolCreateInfo queryPoolCreateInfo = {
178  .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
179  .pNext = &performanceQueryCreateInfo,
180  .flags = 0,
181  // Using our new query type here
182  .queryType = VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,
183  .queryCount = 1,
184  .pipelineStatistics = 0
185};
186
187VkQueryPool queryPool;
188
189VkResult result = vkCreateQueryPool(
190  device,
191  &queryPoolCreateInfo,
192  NULL,
193  &queryPool);
194
195assert(VK_SUCCESS == result);
196
197// A queue from queueFamilyIndex
198VkQueue queue;
199
200// A command buffer we want to record counters on
201VkCommandBuffer commandBuffer;
202
203VkCommandBufferBeginInfo commandBufferBeginInfo = {
204  .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
205  .pNext = NULL,
206  .flags = 0,
207  .pInheritanceInfo = NULL
208};
209
210VkAcquireProfilingLockInfoKHR lockInfo = {
211  .sType = VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
212  .pNext = NULL,
213  .flags = 0,
214  .timeout = UINT64_MAX // Wait forever for the lock
215};
216
217// Acquire the profiling lock before we record command buffers
218// that will use performance queries
219
220result = vkAcquireProfilingLockKHR(device, &lockInfo);
221
222assert(VK_SUCCESS == result);
223
224result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
225
226assert(VK_SUCCESS == result);
227
228vkCmdResetQueryPool(
229  commandBuffer,
230  queryPool,
231  0,
232  1);
233
234vkCmdBeginQuery(
235  commandBuffer,
236  queryPool,
237  0,
238  0);
239
240// Perform the commands you want to get performance information on
241// ...
242
243// Perform a barrier to ensure all previous commands were complete before
244// ending the query
245vkCmdPipelineBarrier(commandBuffer,
246  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
247  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
248  0,
249  0,
250  NULL,
251  0,
252  NULL,
253  0,
254  NULL);
255
256vkCmdEndQuery(
257  commandBuffer,
258  queryPool,
259  0);
260
261result = vkEndCommandBuffer(commandBuffer);
262
263assert(VK_SUCCESS == result);
264
265for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) {
266
267  VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
268    VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
269    NULL,
270    counterPass
271  };
272
273
274  // Submit the command buffer and wait for its completion
275  // ...
276}
277
278// Release the profiling lock after the command buffer is no longer in the
279// pending state.
280vkReleaseProfilingLockKHR(device);
281
282result = vkResetCommandBuffer(commandBuffer, 0);
283
284assert(VK_SUCCESS == result);
285
286// Create an array to hold the results of all counters
287VkPerformanceCounterResultKHR* recordedCounters = malloc(
288  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount);
289
290result = vkGetQueryPoolResults(
291  device,
292  queryPool,
293  0,
294  1,
295  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
296  recordedCounters,
297  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
298  NULL);
299
300// recordedCounters is filled with our counters, we will look at one for posterity
301switch (counters[0].storage) {
302  case VK_PERFORMANCE_COUNTER_STORAGE_INT32:
303    // use recordCounters[0].int32 to get at the counter result!
304    break;
305  case VK_PERFORMANCE_COUNTER_STORAGE_INT64:
306    // use recordCounters[0].int64 to get at the counter result!
307    break;
308  case VK_PERFORMANCE_COUNTER_STORAGE_UINT32:
309    // use recordCounters[0].uint32 to get at the counter result!
310    break;
311  case VK_PERFORMANCE_COUNTER_STORAGE_UINT64:
312    // use recordCounters[0].uint64 to get at the counter result!
313    break;
314  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32:
315    // use recordCounters[0].float32 to get at the counter result!
316    break;
317  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64:
318    // use recordCounters[0].float64 to get at the counter result!
319    break;
320}
321----
322
323=== Version History
324
325  * Revision 1, 2019-10-08
326