• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 Google LLC
3  * SPDX-License-Identifier: MIT
4  *
5  * based in part on anv and radv which are:
6  * Copyright © 2015 Intel Corporation
7  * Copyright © 2016 Red Hat.
8  * Copyright © 2016 Bas Nieuwenhuizen
9  */
10 
11 #include "vn_query_pool.h"
12 
13 #include "venus-protocol/vn_protocol_driver_query_pool.h"
14 
15 #include "vn_device.h"
16 #include "vn_feedback.h"
17 #include "vn_physical_device.h"
18 
19 /* query pool commands */
20 
21 VkResult
vn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)22 vn_CreateQueryPool(VkDevice device,
23                    const VkQueryPoolCreateInfo *pCreateInfo,
24                    const VkAllocationCallbacks *pAllocator,
25                    VkQueryPool *pQueryPool)
26 {
27    struct vn_device *dev = vn_device_from_handle(device);
28    const VkAllocationCallbacks *alloc =
29       pAllocator ? pAllocator : &dev->base.base.alloc;
30 
31    struct vn_query_pool *pool =
32       vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
33                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
34    if (!pool)
35       return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
36 
37    vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
38 
39    pool->allocator = *alloc;
40 
41    switch (pCreateInfo->queryType) {
42    case VK_QUERY_TYPE_OCCLUSION:
43       /*
44        * Occlusion queries write one integer value - the number of samples
45        * passed.
46        */
47       pool->result_array_size = 1;
48       break;
49    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
50       /*
51        * Pipeline statistics queries write one integer value for each bit that
52        * is enabled in the pipelineStatistics when the pool is created, and
53        * the statistics values are written in bit order starting from the
54        * least significant bit.
55        */
56       pool->result_array_size =
57          util_bitcount(pCreateInfo->pipelineStatistics);
58       break;
59    case VK_QUERY_TYPE_TIMESTAMP:
60       /*  Timestamp queries write one integer value. */
61       pool->result_array_size = 1;
62       break;
63    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
64       /*
65        * Transform feedback queries write two integers; the first integer is
66        * the number of primitives successfully written to the corresponding
67        * transform feedback buffer and the second is the number of primitives
68        * output to the vertex stream, regardless of whether they were
69        * successfully captured or not.
70        */
71       pool->result_array_size = 2;
72       break;
73    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
74       /*
75        * Primitives generated queries write one integer value; the number of
76        * primitives output to the vertex stream, regardless of whether
77        * transform feedback is active or not, or whether they were
78        * successfully captured by transform feedback or not. This is identical
79        * to the second integer of the transform feedback queries if transform
80        * feedback is active.
81        */
82       pool->result_array_size = 1;
83       break;
84    default:
85       unreachable("bad query type");
86       break;
87    }
88 
89    if (!VN_PERF(NO_QUERY_FEEDBACK)) {
90       /* Feedback results are always 64 bit and include availability bit
91        * (also 64 bit)
92        */
93       const uint32_t slot_size = (pool->result_array_size * 8) + 8;
94       VkResult result = vn_feedback_buffer_create(
95          dev, slot_size * pCreateInfo->queryCount, alloc, &pool->fb_buf);
96       if (result != VK_SUCCESS) {
97          vn_object_base_fini(&pool->base);
98          vk_free(alloc, pool);
99          return vn_error(dev->instance, result);
100       }
101    }
102 
103    /* Venus has to handle overflow behavior with query feedback to keep
104     * consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
105     * The default query feedback behavior is to wrap on overflow. However, per
106     * spec:
107     *
108     * If an unsigned integer query’s value overflows the result type, the
109     * value may either wrap or saturate.
110     *
111     * We detect the renderer side implementation to align with the
112     * implementation specific behavior.
113     */
114    switch (dev->physical_device->renderer_driver_id) {
115    case VK_DRIVER_ID_ARM_PROPRIETARY:
116    case VK_DRIVER_ID_MESA_LLVMPIPE:
117    case VK_DRIVER_ID_MESA_TURNIP:
118       pool->saturate_on_overflow = true;
119       break;
120    default:
121       break;
122    };
123 
124    VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
125    vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
126                               &pool_handle);
127 
128    *pQueryPool = pool_handle;
129 
130    return VK_SUCCESS;
131 }
132 
133 void
vn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)134 vn_DestroyQueryPool(VkDevice device,
135                     VkQueryPool queryPool,
136                     const VkAllocationCallbacks *pAllocator)
137 {
138    struct vn_device *dev = vn_device_from_handle(device);
139    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
140    const VkAllocationCallbacks *alloc;
141 
142    if (!pool)
143       return;
144 
145    alloc = pAllocator ? pAllocator : &pool->allocator;
146 
147    if (pool->fb_buf)
148       vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
149 
150    vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
151 
152    vn_object_base_fini(&pool->base);
153    vk_free(alloc, pool);
154 }
155 
156 void
vn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)157 vn_ResetQueryPool(VkDevice device,
158                   VkQueryPool queryPool,
159                   uint32_t firstQuery,
160                   uint32_t queryCount)
161 {
162    struct vn_device *dev = vn_device_from_handle(device);
163    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
164 
165    vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
166                              queryCount);
167    if (pool->fb_buf) {
168       /* Feedback results are always 64 bit and include availability bit
169        * (also 64 bit)
170        */
171       const uint32_t slot_size = (pool->result_array_size * 8) + 8;
172       const uint32_t offset = slot_size * firstQuery;
173       memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
174    }
175 }
176 
177 static VkResult
vn_get_query_pool_feedback(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)178 vn_get_query_pool_feedback(struct vn_query_pool *pool,
179                            uint32_t firstQuery,
180                            uint32_t queryCount,
181                            void *pData,
182                            VkDeviceSize stride,
183                            VkQueryResultFlags flags)
184 {
185    VkResult result = VK_SUCCESS;
186    /* Feedback results are always 64 bit and include availability bit
187     * (also 64 bit)
188     */
189    const uint32_t slot_array_size = pool->result_array_size + 1;
190    uint64_t *src = pool->fb_buf->data;
191    src += slot_array_size * firstQuery;
192 
193    uint32_t dst_index = 0;
194    uint32_t src_index = 0;
195    if (flags & VK_QUERY_RESULT_64_BIT) {
196       uint64_t *dst = pData;
197       uint32_t index_stride = stride / sizeof(uint64_t);
198       for (uint32_t i = 0; i < queryCount; i++) {
199          /* Copy the result if its available */
200          const uint64_t avail = src[src_index + pool->result_array_size];
201          if (avail) {
202             memcpy(&dst[dst_index], &src[src_index],
203                    pool->result_array_size * sizeof(uint64_t));
204          } else {
205             result = VK_NOT_READY;
206             /* valid to return result of 0 if partial bit is set */
207             if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
208                memset(&dst[dst_index], 0,
209                       pool->result_array_size * sizeof(uint64_t));
210             }
211          }
212          /* Set the availability bit if requested */
213          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
214             dst[dst_index + pool->result_array_size] = avail;
215 
216          dst_index += index_stride;
217          src_index += slot_array_size;
218       }
219    } else {
220       uint32_t *dst = pData;
221       uint32_t index_stride = stride / sizeof(uint32_t);
222       for (uint32_t i = 0; i < queryCount; i++) {
223          /* Copy the result if its available, converting down to uint32_t */
224          const uint32_t avail =
225             (uint32_t)src[src_index + pool->result_array_size];
226          if (avail) {
227             for (uint32_t j = 0; j < pool->result_array_size; j++) {
228                const uint64_t src_val = src[src_index + j];
229                dst[dst_index + j] =
230                   src_val > UINT32_MAX && pool->saturate_on_overflow
231                      ? UINT32_MAX
232                      : (uint32_t)src_val;
233             }
234          } else {
235             result = VK_NOT_READY;
236             /* valid to return result of 0 if partial bit is set */
237             if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
238                for (uint32_t j = 0; j < pool->result_array_size; j++)
239                   dst[dst_index + j] = 0;
240             }
241          }
242          /* Set the availability bit if requested */
243          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
244             dst[dst_index + pool->result_array_size] = avail;
245 
246          dst_index += index_stride;
247          src_index += slot_array_size;
248       }
249    }
250    return result;
251 }
252 
253 static VkResult
vn_query_feedback_wait_ready(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount)254 vn_query_feedback_wait_ready(struct vn_query_pool *pool,
255                              uint32_t firstQuery,
256                              uint32_t queryCount)
257 {
258    /* Timeout after 5 seconds */
259    uint64_t timeout = 5000ull * 1000 * 1000;
260    uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
261 
262    /* Feedback results are always 64 bit and include availability bit
263     * (also 64 bit)
264     */
265    const uint32_t slot_array_size = pool->result_array_size + 1;
266    volatile uint64_t *src = pool->fb_buf->data;
267    src += (slot_array_size * firstQuery) + pool->result_array_size;
268 
269    uint32_t src_index = 0;
270    for (uint32_t i = 0; i < queryCount; i++) {
271       while (!src[src_index]) {
272          if (os_time_get_nano() > abs_timeout_ns)
273             return VK_ERROR_DEVICE_LOST;
274 
275          thrd_yield();
276       }
277       src_index += slot_array_size;
278    }
279    return VK_SUCCESS;
280 }
281 
282 VkResult
vn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)283 vn_GetQueryPoolResults(VkDevice device,
284                        VkQueryPool queryPool,
285                        uint32_t firstQuery,
286                        uint32_t queryCount,
287                        size_t dataSize,
288                        void *pData,
289                        VkDeviceSize stride,
290                        VkQueryResultFlags flags)
291 {
292    struct vn_device *dev = vn_device_from_handle(device);
293    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
294    const VkAllocationCallbacks *alloc = &pool->allocator;
295    VkResult result;
296 
297    const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
298    const size_t result_size = pool->result_array_size * result_width;
299    const bool result_always_written =
300       flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
301 
302    /* Get results from feedback buffers
303     * Not possible for VK_QUERY_RESULT_PARTIAL_BIT
304     */
305    if (pool->fb_buf) {
306       /* If wait bit is set, wait poll until query is ready */
307       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
308          result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
309          if (result != VK_SUCCESS)
310             return vn_result(dev->instance, result);
311       }
312       result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
313                                           stride, flags);
314       return vn_result(dev->instance, result);
315    }
316 
317    VkQueryResultFlags packed_flags = flags;
318    size_t packed_stride = result_size;
319    if (!result_always_written)
320       packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
321    if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
322       packed_stride += result_width;
323 
324    const size_t packed_size = packed_stride * queryCount;
325    void *packed_data;
326    if (result_always_written && packed_stride == stride) {
327       packed_data = pData;
328    } else {
329       packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
330                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
331       if (!packed_data)
332          return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
333    }
334    result = vn_call_vkGetQueryPoolResults(
335       dev->primary_ring, device, queryPool, firstQuery, queryCount,
336       packed_size, packed_data, packed_stride, packed_flags);
337 
338    if (packed_data == pData)
339       return vn_result(dev->instance, result);
340 
341    const size_t copy_size =
342       result_size +
343       (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
344    const void *src = packed_data;
345    void *dst = pData;
346    if (result == VK_SUCCESS) {
347       for (uint32_t i = 0; i < queryCount; i++) {
348          memcpy(dst, src, copy_size);
349          src += packed_stride;
350          dst += stride;
351       }
352    } else if (result == VK_NOT_READY) {
353       assert(!result_always_written &&
354              (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
355       if (flags & VK_QUERY_RESULT_64_BIT) {
356          for (uint32_t i = 0; i < queryCount; i++) {
357             const bool avail = *(const uint64_t *)(src + result_size);
358             if (avail)
359                memcpy(dst, src, copy_size);
360             else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
361                *(uint64_t *)(dst + result_size) = 0;
362 
363             src += packed_stride;
364             dst += stride;
365          }
366       } else {
367          for (uint32_t i = 0; i < queryCount; i++) {
368             const bool avail = *(const uint32_t *)(src + result_size);
369             if (avail)
370                memcpy(dst, src, copy_size);
371             else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
372                *(uint32_t *)(dst + result_size) = 0;
373 
374             src += packed_stride;
375             dst += stride;
376          }
377       }
378    }
379 
380    vk_free(alloc, packed_data);
381    return vn_result(dev->instance, result);
382 }
383