• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2016-2021 The Khronos Group, Inc.
2//
3// SPDX-License-Identifier: CC-BY-4.0
4
5include::{generated}/meta/{refprefix}VK_KHR_performance_query.txt[]
6
7=== Other Extension Metadata
8
9*Last Modified Date*::
10    2019-10-08
11*IP Status*::
12    No known IP claims.
13*Contributors*::
14  - Jesse Barker, Unity Technologies
15  - Kenneth Benzie, Codeplay
16  - Jan-Harald Fredriksen, ARM
17  - Jeff Leger, Qualcomm
18  - Jesse Hall, Google
19  - Tobias Hector, AMD
20  - Neil Henning, Codeplay
21  - Baldur Karlsson
22  - Lionel Landwerlin, Intel
23  - Peter Lohrmann, AMD
24  - Alon Or-bach, Samsung
25  - Daniel Rakos, AMD
26  - Niklas Smedberg, Unity Technologies
27  - Igor Ostrowski, Intel
28
29=== Description
30
31The `VK_KHR_performance_query` extension adds a mechanism to allow querying
32of performance counters for use in applications and by profiling tools.
33
34Each queue family may: expose counters that can: be enabled on a queue of
35that family.
36We extend elink:VkQueryType to add a new query type for performance queries,
37and chain a structure on slink:VkQueryPoolCreateInfo to specify the
38performance queries to enable.
39
40include::{generated}/interfaces/VK_KHR_performance_query.txt[]
41
42=== Issues
43
441) Should this extension include a mechanism to begin a query in command
45buffer _A_ and end the query in command buffer _B_?
46
47*RESOLVED* No - queries are tied to command buffer creation and thus have to
48be encapsulated within a single command buffer.
49
502) Should this extension include a mechanism to begin and end queries
51globally on the queue, not using the existing command buffer commands?
52
53*RESOLVED* No - for the same reasoning as the resolution of 1).
54
553) Should this extension expose counters that require multiple passes?
56
57*RESOLVED* Yes - users should re-submit a command buffer with the same
58commands in it multiple times, specifying the pass to count as the query
59parameter in VkPerformanceQuerySubmitInfoKHR.
60
614) How to handle counters across parallel workloads?
62
63*RESOLVED* In the spirit of Vulkan, a counter description flag
64ename:VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR
65denotes that the accuracy of a counter result is affected by parallel
66workloads.
67
685) How to handle secondary command buffers?
69
70*RESOLVED* Secondary command buffers inherit any counter pass index
71specified in the parent primary command buffer.
72Note: this is no longer an issue after change from issue 10 resolution
73
746) What commands does the profiling lock have to be held for?
75
76*RESOLVED* For any command buffer that is being queried with a performance
77query pool, the profiling lock must: be held while that command buffer is in
78the _recording_, _executable_, or _pending state_.
79
807) Should we support flink:vkCmdCopyQueryPoolResults?
81
82*RESOLVED* Yes.
83
848) Should we allow performance queries to interact with multiview?
85
86*RESOLVED* Yes, but the performance queries must be performed once for each
87pass per view.
88
899) Should a `queryCount > 1` be usable for performance queries?
90
91*RESOLVED* Yes.
92Some vendors will have costly performance counter query pool creation, and
93would rather if a certain set of counters were to be used multiple times
94that a `queryCount > 1` can be used to amortize the instantiation cost.
95
9610) Should we introduce an indirect mechanism to set the counter pass index?
97
98*RESOLVED* Specify the counter pass index at submit time instead, to avoid
99requiring re-recording of command buffers when multiple counter passes are
100needed.
101
102
103=== Examples
104
105The following example shows how to find what performance counters a queue
106family supports, setup a query pool to record these performance counters,
107how to add the query pool to the command buffer to record information, and
108how to get the results from the query pool.
109
110[source,c++]
111--------------------------------------
112// A previously created physical device
113VkPhysicalDevice physicalDevice;
114
115// One of the queue families our device supports
116uint32_t queueFamilyIndex;
117
118uint32_t counterCount;
119
120// Get the count of counters supported
121vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
122  physicalDevice,
123  queueFamilyIndex,
124  &counterCount,
125  NULL,
126  NULL);
127
128VkPerformanceCounterKHR* counters =
129  malloc(sizeof(VkPerformanceCounterKHR) * counterCount);
130VkPerformanceCounterDescriptionKHR* counterDescriptions =
131  malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount);
132
133// Get the counters supported
134vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
135  physicalDevice,
136  queueFamilyIndex,
137  &counterCount,
138  counters,
139  counterDescriptions);
140
141// Try to enable the first 8 counters
142uint32_t enabledCounters[8];
143
144const uint32_t enabledCounterCount = min(counterCount, 8));
145
146for (uint32_t i = 0; i < enabledCounterCount; i++) {
147  enabledCounters[i] = i;
148}
149
150// A previously created device that had the performanceCounterQueryPools feature
151// set to VK_TRUE
152VkDevice device;
153
154VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
155  VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
156  NULL,
157
158  // Specify the queue family that this performance query is performed on
159  queueFamilyIndex,
160
161  // The number of counters to enable
162  enabledCounterCount,
163
164  // The array of indices of counters to enable
165  enabledCounters
166};
167
168
169// Get the number of passes our counters will require.
170uint32_t numPasses;
171
172vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
173  physicalDevice,
174  &performanceQueryCreateInfo,
175  &numPasses);
176
177VkQueryPoolCreateInfo queryPoolCreateInfo = {
178  VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
179  &performanceQueryCreateInfo,
180  0,
181
182  // Using our new query type here
183  VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,
184
185  1,
186
187  0
188};
189
190VkQueryPool queryPool;
191
192VkResult result = vkCreateQueryPool(
193  device,
194  &queryPoolCreateInfo,
195  NULL,
196  &queryPool);
197
198assert(VK_SUCCESS == result);
199
200// A queue from queueFamilyIndex
201VkQueue queue;
202
203// A command buffer we want to record counters on
204VkCommandBuffer commandBuffer;
205
206VkCommandBufferBeginInfo commandBufferBeginInfo = {
207  VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
208  NULL,
209  0,
210  NULL
211};
212
213VkAcquireProfilingLockInfoKHR lockInfo = {
214  VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
215  NULL,
216  0,
217  UINT64_MAX // Wait forever for the lock
218};
219
220// Acquire the profiling lock before we record command buffers
221// that will use performance queries
222
223result = vkAcquireProfilingLockKHR(device, &lockInfo);
224
225assert(VK_SUCCESS == result);
226
227result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
228
229assert(VK_SUCCESS == result);
230
231vkCmdResetQueryPool(
232  commandBuffer,
233  queryPool,
234  0,
235  1);
236
237vkCmdBeginQuery(
238  commandBuffer,
239  queryPool,
240  0,
241  0);
242
243// Perform the commands you want to get performance information on
244// ...
245
246// Perform a barrier to ensure all previous commands were complete before
247// ending the query
248vkCmdPipelineBarrier(commandBuffer,
249  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
250  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
251  0,
252  0,
253  NULL,
254  0,
255  NULL,
256  0,
257  NULL);
258
259vkCmdEndQuery(
260  commandBuffer,
261  queryPool,
262  0);
263
264result = vkEndCommandBuffer(commandBuffer);
265
266assert(VK_SUCCESS == result);
267
268for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) {
269
270  VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
271    VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
272    NULL,
273    counterPass
274  };
275
276
277  // Submit the command buffer and wait for its completion
278  // ...
279}
280
281// Release the profiling lock after the command buffer is no longer in the
282// pending state.
283vkReleaseProfilingLockKHR(device);
284
285result = vkResetCommandBuffer(commandBuffer, 0);
286
287assert(VK_SUCCESS == result);
288
289// Create an array to hold the results of all counters
290VkPerformanceCounterResultKHR* recordedCounters = malloc(
291  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount);
292
293result = vkGetQueryPoolResults(
294  device,
295  queryPool,
296  0,
297  1,
298  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
299  recordedCounters,
300  sizeof(VkPerformanceCounterResultKHR),
301  NULL);
302
303// recordedCounters is filled with our counters, we will look at one for posterity
304switch (counters[0].storage) {
305  case VK_PERFORMANCE_COUNTER_STORAGE_INT32:
306    // use recordCounters[0].int32 to get at the counter result!
307    break;
308  case VK_PERFORMANCE_COUNTER_STORAGE_INT64:
309    // use recordCounters[0].int64 to get at the counter result!
310    break;
311  case VK_PERFORMANCE_COUNTER_STORAGE_UINT32:
312    // use recordCounters[0].uint32 to get at the counter result!
313    break;
314  case VK_PERFORMANCE_COUNTER_STORAGE_UINT64:
315    // use recordCounters[0].uint64 to get at the counter result!
316    break;
317  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32:
318    // use recordCounters[0].float32 to get at the counter result!
319    break;
320  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64:
321    // use recordCounters[0].float64 to get at the counter result!
322    break;
323}
324--------------------------------------
325
326=== Version History
327
328 * Revision 1, 2019-10-08
329