• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 #include "dzn_abi_helper.h"
26 
27 #include "vk_alloc.h"
28 #include "vk_debug_report.h"
29 #include "vk_util.h"
30 
31 #include "os_time.h"
32 
33 static D3D12_QUERY_HEAP_TYPE
dzn_query_pool_get_heap_type(VkQueryType in)34 dzn_query_pool_get_heap_type(VkQueryType in)
35 {
36    switch (in) {
37    case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
38    case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
39    case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
40    default: unreachable("Unsupported query type");
41    }
42 }
43 
44 D3D12_QUERY_TYPE
dzn_query_pool_get_query_type(const struct dzn_query_pool * qpool,VkQueryControlFlags flags)45 dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool,
46                               VkQueryControlFlags flags)
47 {
48    switch (qpool->heap_type) {
49    case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
50       return flags & VK_QUERY_CONTROL_PRECISE_BIT ?
51              D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION;
52    case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
53    case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP;
54    default: unreachable("Unsupported query type");
55    }
56 }
57 
58 static void
dzn_query_pool_destroy(struct dzn_query_pool * qpool,const VkAllocationCallbacks * alloc)59 dzn_query_pool_destroy(struct dzn_query_pool *qpool,
60                        const VkAllocationCallbacks *alloc)
61 {
62    if (!qpool)
63       return;
64 
65    struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk);
66 
67    if (qpool->collect_map)
68       ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL);
69 
70    if (qpool->collect_buffer)
71       ID3D12Resource_Release(qpool->collect_buffer);
72 
73    if (qpool->resolve_buffer)
74       ID3D12Resource_Release(qpool->resolve_buffer);
75 
76    if (qpool->heap)
77       ID3D12QueryHeap_Release(qpool->heap);
78 
79    for (uint32_t q = 0; q < qpool->query_count; q++) {
80       if (qpool->queries[q].fence)
81          ID3D12Fence_Release(qpool->queries[q].fence);
82    }
83 
84    mtx_destroy(&qpool->queries_lock);
85    vk_object_base_finish(&qpool->base);
86    vk_free2(&device->vk.alloc, alloc, qpool);
87 }
88 
89 static VkResult
dzn_query_pool_create(struct dzn_device * device,const VkQueryPoolCreateInfo * info,const VkAllocationCallbacks * alloc,VkQueryPool * out)90 dzn_query_pool_create(struct dzn_device *device,
91                       const VkQueryPoolCreateInfo *info,
92                       const VkAllocationCallbacks *alloc,
93                       VkQueryPool *out)
94 {
95    VK_MULTIALLOC(ma);
96    VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1);
97    VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount);
98 
99    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
100                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
101       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
102 
103    vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL);
104 
105    mtx_init(&qpool->queries_lock, mtx_plain);
106    qpool->query_count = info->queryCount;
107    qpool->queries = queries;
108 
109    D3D12_QUERY_HEAP_DESC desc = { 0 };
110    qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType);
111    desc.Count = info->queryCount;
112    desc.NodeMask = 0;
113 
114    HRESULT hres =
115       ID3D12Device1_CreateQueryHeap(device->dev, &desc,
116                                     &IID_ID3D12QueryHeap,
117                                     (void **)&qpool->heap);
118    if (FAILED(hres)) {
119       dzn_query_pool_destroy(qpool, alloc);
120       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
121    }
122 
123    switch (info->queryType) {
124    case VK_QUERY_TYPE_OCCLUSION:
125    case VK_QUERY_TYPE_TIMESTAMP:
126       qpool->query_size = sizeof(uint64_t);
127       break;
128    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
129       qpool->pipeline_statistics = info->pipelineStatistics;
130       qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
131       break;
132    default: unreachable("Unsupported query type");
133    }
134 
135    D3D12_HEAP_PROPERTIES hprops =
136       dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_DEFAULT);
137    D3D12_RESOURCE_DESC rdesc = {
138       .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
139       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
140       .Width = info->queryCount * qpool->query_size,
141       .Height = 1,
142       .DepthOrArraySize = 1,
143       .MipLevels = 1,
144       .Format = DXGI_FORMAT_UNKNOWN,
145       .SampleDesc = { .Count = 1, .Quality = 0 },
146       .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
147       .Flags = D3D12_RESOURCE_FLAG_NONE,
148    };
149 
150    hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
151                                                 D3D12_HEAP_FLAG_NONE,
152                                                 &rdesc,
153                                                 D3D12_RESOURCE_STATE_COPY_DEST,
154                                                 NULL,
155                                                 &IID_ID3D12Resource,
156                                                 (void **)&qpool->resolve_buffer);
157    if (FAILED(hres)) {
158       dzn_query_pool_destroy(qpool, alloc);
159       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
160    }
161 
162    hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0,
163                                                       D3D12_HEAP_TYPE_READBACK);
164    rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t));
165    hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
166                                                 D3D12_HEAP_FLAG_NONE,
167                                                 &rdesc,
168                                                 D3D12_RESOURCE_STATE_COPY_DEST,
169                                                 NULL,
170                                                 &IID_ID3D12Resource,
171                                                 (void **)&qpool->collect_buffer);
172    if (FAILED(hres)) {
173       dzn_query_pool_destroy(qpool, alloc);
174       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
175    }
176 
177    hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map);
178    if (FAILED(hres)) {
179       dzn_query_pool_destroy(qpool, alloc);
180       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181    }
182 
183    memset(qpool->collect_map, 0, rdesc.Width);
184 
185    *out = dzn_query_pool_to_handle(qpool);
186    return VK_SUCCESS;
187 }
188 
189 uint32_t
dzn_query_pool_get_result_offset(const struct dzn_query_pool * qpool,uint32_t query)190 dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query)
191 {
192    return query * qpool->query_size;
193 }
194 
195 uint32_t
dzn_query_pool_get_result_size(const struct dzn_query_pool * qpool,uint32_t query_count)196 dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count)
197 {
198    return query_count * qpool->query_size;
199 }
200 
201 uint32_t
dzn_query_pool_get_availability_offset(const struct dzn_query_pool * qpool,uint32_t query)202 dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query)
203 {
204    return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query);
205 }
206 
207 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)208 dzn_CreateQueryPool(VkDevice device,
209                     const VkQueryPoolCreateInfo *pCreateInfo,
210                     const VkAllocationCallbacks *pAllocator,
211                     VkQueryPool *pQueryPool)
212 {
213    return dzn_query_pool_create(dzn_device_from_handle(device),
214                                 pCreateInfo, pAllocator, pQueryPool);
215 }
216 
217 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)218 dzn_DestroyQueryPool(VkDevice device,
219                      VkQueryPool queryPool,
220                      const VkAllocationCallbacks *pAllocator)
221 {
222    dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator);
223 }
224 
225 VKAPI_ATTR void VKAPI_CALL
dzn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)226 dzn_ResetQueryPool(VkDevice device,
227                    VkQueryPool queryPool,
228                    uint32_t firstQuery,
229                    uint32_t queryCount)
230 {
231    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
232 
233    mtx_lock(&qpool->queries_lock);
234    for (uint32_t q = 0; q < queryCount; q++) {
235       struct dzn_query *query = &qpool->queries[firstQuery + q];
236 
237       query->fence_value = 0;
238       if (query->fence) {
239          ID3D12Fence_Release(query->fence);
240          query->fence = NULL;
241       }
242    }
243    mtx_lock(&qpool->queries_lock);
244 
245    memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery),
246           0, queryCount * qpool->query_size);
247    memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery),
248           0, queryCount * sizeof(uint64_t));
249 }
250 
251 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)252 dzn_GetQueryPoolResults(VkDevice device,
253                         VkQueryPool queryPool,
254                         uint32_t firstQuery,
255                         uint32_t queryCount,
256                         size_t dataSize,
257                         void *pData,
258                         VkDeviceSize stride,
259                         VkQueryResultFlags flags)
260 {
261    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
262 
263    uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ?
264                    sizeof(uint64_t) : sizeof(uint32_t);
265    VkResult result = VK_SUCCESS;
266 
267    for (uint32_t q = 0; q < queryCount; q++) {
268       struct dzn_query *query = &qpool->queries[q + firstQuery];
269 
270       uint8_t *dst_ptr = (uint8_t *)pData + (stride * q);
271       uint8_t *src_ptr =
272          (uint8_t *)qpool->collect_map +
273          dzn_query_pool_get_result_offset(qpool, firstQuery + q);
274       uint64_t available = 0;
275 
276       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
277          ID3D12Fence *query_fence = NULL;
278          uint64_t query_fence_val = 0;
279 
280          while (true) {
281             mtx_lock(&qpool->queries_lock);
282             if (query->fence) {
283                query_fence = query->fence;
284                ID3D12Fence_AddRef(query_fence);
285             }
286             query_fence_val = query->fence_value;
287             mtx_unlock(&qpool->queries_lock);
288 
289             if (query_fence)
290                break;
291 
292             /* Check again in 10ms.
293              * FIXME: decrease the polling period if it happens to hurt latency.
294              */
295             os_time_sleep(10 * 1000);
296          }
297 
298          ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL);
299          ID3D12Fence_Release(query_fence);
300          available = UINT64_MAX;
301       } else {
302          ID3D12Fence *query_fence = NULL;
303          mtx_lock(&qpool->queries_lock);
304          if (query->fence) {
305             query_fence = query->fence;
306             ID3D12Fence_AddRef(query_fence);
307          }
308          uint64_t query_fence_val = query->fence_value;
309          mtx_unlock(&qpool->queries_lock);
310 
311          if (query_fence) {
312             if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val)
313                available = UINT64_MAX;
314             ID3D12Fence_Release(query_fence);
315          }
316       }
317 
318       if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
319          if (available)
320             memcpy(dst_ptr, src_ptr, step);
321          else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
322             memset(dst_ptr, 0, step);
323 
324          dst_ptr += step;
325       } else {
326          for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
327             if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
328                continue;
329 
330             if (available)
331                memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step);
332             else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
333                memset(dst_ptr, 0, step);
334 
335             dst_ptr += step;
336          }
337       }
338 
339       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
340          memcpy(dst_ptr, &available, step);
341 
342       if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
343          result = VK_NOT_READY;
344    }
345 
346    return result;
347 }
348