1 /*
2 * Copyright 2019 Google LLC
3 * SPDX-License-Identifier: MIT
4 *
5 * based in part on anv and radv which are:
6 * Copyright © 2015 Intel Corporation
7 * Copyright © 2016 Red Hat.
8 * Copyright © 2016 Bas Nieuwenhuizen
9 */
10
11 #include "vn_query_pool.h"
12
13 #include "venus-protocol/vn_protocol_driver_query_pool.h"
14
15 #include "vn_device.h"
16 #include "vn_feedback.h"
17 #include "vn_physical_device.h"
18
19 /* query pool commands */
20
21 VkResult
vn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)22 vn_CreateQueryPool(VkDevice device,
23 const VkQueryPoolCreateInfo *pCreateInfo,
24 const VkAllocationCallbacks *pAllocator,
25 VkQueryPool *pQueryPool)
26 {
27 struct vn_device *dev = vn_device_from_handle(device);
28 const VkAllocationCallbacks *alloc =
29 pAllocator ? pAllocator : &dev->base.base.alloc;
30
31 struct vn_query_pool *pool =
32 vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
33 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
34 if (!pool)
35 return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
36
37 vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
38
39 pool->allocator = *alloc;
40
41 switch (pCreateInfo->queryType) {
42 case VK_QUERY_TYPE_OCCLUSION:
43 /*
44 * Occlusion queries write one integer value - the number of samples
45 * passed.
46 */
47 pool->result_array_size = 1;
48 break;
49 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
50 /*
51 * Pipeline statistics queries write one integer value for each bit that
52 * is enabled in the pipelineStatistics when the pool is created, and
53 * the statistics values are written in bit order starting from the
54 * least significant bit.
55 */
56 pool->result_array_size =
57 util_bitcount(pCreateInfo->pipelineStatistics);
58 break;
59 case VK_QUERY_TYPE_TIMESTAMP:
60 /* Timestamp queries write one integer value. */
61 pool->result_array_size = 1;
62 break;
63 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
64 /*
65 * Transform feedback queries write two integers; the first integer is
66 * the number of primitives successfully written to the corresponding
67 * transform feedback buffer and the second is the number of primitives
68 * output to the vertex stream, regardless of whether they were
69 * successfully captured or not.
70 */
71 pool->result_array_size = 2;
72 break;
73 case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
74 /*
75 * Primitives generated queries write one integer value; the number of
76 * primitives output to the vertex stream, regardless of whether
77 * transform feedback is active or not, or whether they were
78 * successfully captured by transform feedback or not. This is identical
79 * to the second integer of the transform feedback queries if transform
80 * feedback is active.
81 */
82 pool->result_array_size = 1;
83 break;
84 default:
85 unreachable("bad query type");
86 break;
87 }
88
89 if (!VN_PERF(NO_QUERY_FEEDBACK)) {
90 /* Feedback results are always 64 bit and include availability bit
91 * (also 64 bit)
92 */
93 const uint32_t slot_size = (pool->result_array_size * 8) + 8;
94 VkResult result = vn_feedback_buffer_create(
95 dev, slot_size * pCreateInfo->queryCount, alloc, &pool->fb_buf);
96 if (result != VK_SUCCESS) {
97 vn_object_base_fini(&pool->base);
98 vk_free(alloc, pool);
99 return vn_error(dev->instance, result);
100 }
101 }
102
103 /* Venus has to handle overflow behavior with query feedback to keep
104 * consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
105 * The default query feedback behavior is to wrap on overflow. However, per
106 * spec:
107 *
108 * If an unsigned integer query’s value overflows the result type, the
109 * value may either wrap or saturate.
110 *
111 * We detect the renderer side implementation to align with the
112 * implementation specific behavior.
113 */
114 switch (dev->physical_device->renderer_driver_id) {
115 case VK_DRIVER_ID_ARM_PROPRIETARY:
116 case VK_DRIVER_ID_MESA_LLVMPIPE:
117 case VK_DRIVER_ID_MESA_TURNIP:
118 pool->saturate_on_overflow = true;
119 break;
120 default:
121 break;
122 };
123
124 VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
125 vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
126 &pool_handle);
127
128 *pQueryPool = pool_handle;
129
130 return VK_SUCCESS;
131 }
132
133 void
vn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)134 vn_DestroyQueryPool(VkDevice device,
135 VkQueryPool queryPool,
136 const VkAllocationCallbacks *pAllocator)
137 {
138 struct vn_device *dev = vn_device_from_handle(device);
139 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
140 const VkAllocationCallbacks *alloc;
141
142 if (!pool)
143 return;
144
145 alloc = pAllocator ? pAllocator : &pool->allocator;
146
147 if (pool->fb_buf)
148 vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
149
150 vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
151
152 vn_object_base_fini(&pool->base);
153 vk_free(alloc, pool);
154 }
155
156 void
vn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)157 vn_ResetQueryPool(VkDevice device,
158 VkQueryPool queryPool,
159 uint32_t firstQuery,
160 uint32_t queryCount)
161 {
162 struct vn_device *dev = vn_device_from_handle(device);
163 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
164
165 vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
166 queryCount);
167 if (pool->fb_buf) {
168 /* Feedback results are always 64 bit and include availability bit
169 * (also 64 bit)
170 */
171 const uint32_t slot_size = (pool->result_array_size * 8) + 8;
172 const uint32_t offset = slot_size * firstQuery;
173 memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
174 }
175 }
176
177 static VkResult
vn_get_query_pool_feedback(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)178 vn_get_query_pool_feedback(struct vn_query_pool *pool,
179 uint32_t firstQuery,
180 uint32_t queryCount,
181 void *pData,
182 VkDeviceSize stride,
183 VkQueryResultFlags flags)
184 {
185 VkResult result = VK_SUCCESS;
186 /* Feedback results are always 64 bit and include availability bit
187 * (also 64 bit)
188 */
189 const uint32_t slot_array_size = pool->result_array_size + 1;
190 uint64_t *src = pool->fb_buf->data;
191 src += slot_array_size * firstQuery;
192
193 uint32_t dst_index = 0;
194 uint32_t src_index = 0;
195 if (flags & VK_QUERY_RESULT_64_BIT) {
196 uint64_t *dst = pData;
197 uint32_t index_stride = stride / sizeof(uint64_t);
198 for (uint32_t i = 0; i < queryCount; i++) {
199 /* Copy the result if its available */
200 const uint64_t avail = src[src_index + pool->result_array_size];
201 if (avail) {
202 memcpy(&dst[dst_index], &src[src_index],
203 pool->result_array_size * sizeof(uint64_t));
204 } else {
205 result = VK_NOT_READY;
206 /* valid to return result of 0 if partial bit is set */
207 if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
208 memset(&dst[dst_index], 0,
209 pool->result_array_size * sizeof(uint64_t));
210 }
211 }
212 /* Set the availability bit if requested */
213 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
214 dst[dst_index + pool->result_array_size] = avail;
215
216 dst_index += index_stride;
217 src_index += slot_array_size;
218 }
219 } else {
220 uint32_t *dst = pData;
221 uint32_t index_stride = stride / sizeof(uint32_t);
222 for (uint32_t i = 0; i < queryCount; i++) {
223 /* Copy the result if its available, converting down to uint32_t */
224 const uint32_t avail =
225 (uint32_t)src[src_index + pool->result_array_size];
226 if (avail) {
227 for (uint32_t j = 0; j < pool->result_array_size; j++) {
228 const uint64_t src_val = src[src_index + j];
229 dst[dst_index + j] =
230 src_val > UINT32_MAX && pool->saturate_on_overflow
231 ? UINT32_MAX
232 : (uint32_t)src_val;
233 }
234 } else {
235 result = VK_NOT_READY;
236 /* valid to return result of 0 if partial bit is set */
237 if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
238 for (uint32_t j = 0; j < pool->result_array_size; j++)
239 dst[dst_index + j] = 0;
240 }
241 }
242 /* Set the availability bit if requested */
243 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
244 dst[dst_index + pool->result_array_size] = avail;
245
246 dst_index += index_stride;
247 src_index += slot_array_size;
248 }
249 }
250 return result;
251 }
252
253 static VkResult
vn_query_feedback_wait_ready(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount)254 vn_query_feedback_wait_ready(struct vn_query_pool *pool,
255 uint32_t firstQuery,
256 uint32_t queryCount)
257 {
258 /* Timeout after 5 seconds */
259 uint64_t timeout = 5000ull * 1000 * 1000;
260 uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
261
262 /* Feedback results are always 64 bit and include availability bit
263 * (also 64 bit)
264 */
265 const uint32_t slot_array_size = pool->result_array_size + 1;
266 volatile uint64_t *src = pool->fb_buf->data;
267 src += (slot_array_size * firstQuery) + pool->result_array_size;
268
269 uint32_t src_index = 0;
270 for (uint32_t i = 0; i < queryCount; i++) {
271 while (!src[src_index]) {
272 if (os_time_get_nano() > abs_timeout_ns)
273 return VK_ERROR_DEVICE_LOST;
274
275 thrd_yield();
276 }
277 src_index += slot_array_size;
278 }
279 return VK_SUCCESS;
280 }
281
282 VkResult
vn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)283 vn_GetQueryPoolResults(VkDevice device,
284 VkQueryPool queryPool,
285 uint32_t firstQuery,
286 uint32_t queryCount,
287 size_t dataSize,
288 void *pData,
289 VkDeviceSize stride,
290 VkQueryResultFlags flags)
291 {
292 struct vn_device *dev = vn_device_from_handle(device);
293 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
294 const VkAllocationCallbacks *alloc = &pool->allocator;
295 VkResult result;
296
297 const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
298 const size_t result_size = pool->result_array_size * result_width;
299 const bool result_always_written =
300 flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
301
302 /* Get results from feedback buffers
303 * Not possible for VK_QUERY_RESULT_PARTIAL_BIT
304 */
305 if (pool->fb_buf) {
306 /* If wait bit is set, wait poll until query is ready */
307 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
308 result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
309 if (result != VK_SUCCESS)
310 return vn_result(dev->instance, result);
311 }
312 result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
313 stride, flags);
314 return vn_result(dev->instance, result);
315 }
316
317 VkQueryResultFlags packed_flags = flags;
318 size_t packed_stride = result_size;
319 if (!result_always_written)
320 packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
321 if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
322 packed_stride += result_width;
323
324 const size_t packed_size = packed_stride * queryCount;
325 void *packed_data;
326 if (result_always_written && packed_stride == stride) {
327 packed_data = pData;
328 } else {
329 packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
330 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
331 if (!packed_data)
332 return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
333 }
334 result = vn_call_vkGetQueryPoolResults(
335 dev->primary_ring, device, queryPool, firstQuery, queryCount,
336 packed_size, packed_data, packed_stride, packed_flags);
337
338 if (packed_data == pData)
339 return vn_result(dev->instance, result);
340
341 const size_t copy_size =
342 result_size +
343 (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
344 const void *src = packed_data;
345 void *dst = pData;
346 if (result == VK_SUCCESS) {
347 for (uint32_t i = 0; i < queryCount; i++) {
348 memcpy(dst, src, copy_size);
349 src += packed_stride;
350 dst += stride;
351 }
352 } else if (result == VK_NOT_READY) {
353 assert(!result_always_written &&
354 (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
355 if (flags & VK_QUERY_RESULT_64_BIT) {
356 for (uint32_t i = 0; i < queryCount; i++) {
357 const bool avail = *(const uint64_t *)(src + result_size);
358 if (avail)
359 memcpy(dst, src, copy_size);
360 else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
361 *(uint64_t *)(dst + result_size) = 0;
362
363 src += packed_stride;
364 dst += stride;
365 }
366 } else {
367 for (uint32_t i = 0; i < queryCount; i++) {
368 const bool avail = *(const uint32_t *)(src + result_size);
369 if (avail)
370 memcpy(dst, src, copy_size);
371 else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
372 *(uint32_t *)(dst + result_size) = 0;
373
374 src += packed_stride;
375 dst += stride;
376 }
377 }
378 }
379
380 vk_free(alloc, packed_data);
381 return vn_result(dev->instance, result);
382 }
383