1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25 #include "dzn_abi_helper.h"
26
27 #include "vk_alloc.h"
28 #include "vk_debug_report.h"
29 #include "vk_util.h"
30
31 #include "os_time.h"
32
33 static D3D12_QUERY_HEAP_TYPE
dzn_query_pool_get_heap_type(VkQueryType in)34 dzn_query_pool_get_heap_type(VkQueryType in)
35 {
36 switch (in) {
37 case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
38 case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
39 case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
40 default: unreachable("Unsupported query type");
41 }
42 }
43
44 D3D12_QUERY_TYPE
dzn_query_pool_get_query_type(const struct dzn_query_pool * qpool,VkQueryControlFlags flags)45 dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool,
46 VkQueryControlFlags flags)
47 {
48 switch (qpool->heap_type) {
49 case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
50 return flags & VK_QUERY_CONTROL_PRECISE_BIT ?
51 D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION;
52 case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
53 case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP;
54 default: unreachable("Unsupported query type");
55 }
56 }
57
58 static void
dzn_query_pool_destroy(struct dzn_query_pool * qpool,const VkAllocationCallbacks * alloc)59 dzn_query_pool_destroy(struct dzn_query_pool *qpool,
60 const VkAllocationCallbacks *alloc)
61 {
62 if (!qpool)
63 return;
64
65 struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk);
66
67 if (qpool->collect_map)
68 ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL);
69
70 if (qpool->collect_buffer)
71 ID3D12Resource_Release(qpool->collect_buffer);
72
73 if (qpool->resolve_buffer)
74 ID3D12Resource_Release(qpool->resolve_buffer);
75
76 if (qpool->heap)
77 ID3D12QueryHeap_Release(qpool->heap);
78
79 for (uint32_t q = 0; q < qpool->query_count; q++) {
80 if (qpool->queries[q].fence)
81 ID3D12Fence_Release(qpool->queries[q].fence);
82 }
83
84 mtx_destroy(&qpool->queries_lock);
85 vk_object_base_finish(&qpool->base);
86 vk_free2(&device->vk.alloc, alloc, qpool);
87 }
88
89 static VkResult
dzn_query_pool_create(struct dzn_device * device,const VkQueryPoolCreateInfo * info,const VkAllocationCallbacks * alloc,VkQueryPool * out)90 dzn_query_pool_create(struct dzn_device *device,
91 const VkQueryPoolCreateInfo *info,
92 const VkAllocationCallbacks *alloc,
93 VkQueryPool *out)
94 {
95 VK_MULTIALLOC(ma);
96 VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1);
97 VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount);
98
99 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
100 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
101 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
102
103 vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL);
104
105 mtx_init(&qpool->queries_lock, mtx_plain);
106 qpool->query_count = info->queryCount;
107 qpool->queries = queries;
108
109 D3D12_QUERY_HEAP_DESC desc = { 0 };
110 qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType);
111 desc.Count = info->queryCount;
112 desc.NodeMask = 0;
113
114 HRESULT hres =
115 ID3D12Device1_CreateQueryHeap(device->dev, &desc,
116 &IID_ID3D12QueryHeap,
117 (void **)&qpool->heap);
118 if (FAILED(hres)) {
119 dzn_query_pool_destroy(qpool, alloc);
120 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
121 }
122
123 switch (info->queryType) {
124 case VK_QUERY_TYPE_OCCLUSION:
125 case VK_QUERY_TYPE_TIMESTAMP:
126 qpool->query_size = sizeof(uint64_t);
127 break;
128 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
129 qpool->pipeline_statistics = info->pipelineStatistics;
130 qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
131 break;
132 default: unreachable("Unsupported query type");
133 }
134
135 D3D12_HEAP_PROPERTIES hprops =
136 dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_DEFAULT);
137 D3D12_RESOURCE_DESC rdesc = {
138 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
139 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
140 .Width = info->queryCount * qpool->query_size,
141 .Height = 1,
142 .DepthOrArraySize = 1,
143 .MipLevels = 1,
144 .Format = DXGI_FORMAT_UNKNOWN,
145 .SampleDesc = { .Count = 1, .Quality = 0 },
146 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
147 .Flags = D3D12_RESOURCE_FLAG_NONE,
148 };
149
150 hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
151 D3D12_HEAP_FLAG_NONE,
152 &rdesc,
153 D3D12_RESOURCE_STATE_COPY_DEST,
154 NULL,
155 &IID_ID3D12Resource,
156 (void **)&qpool->resolve_buffer);
157 if (FAILED(hres)) {
158 dzn_query_pool_destroy(qpool, alloc);
159 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
160 }
161
162 hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0,
163 D3D12_HEAP_TYPE_READBACK);
164 rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t));
165 hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
166 D3D12_HEAP_FLAG_NONE,
167 &rdesc,
168 D3D12_RESOURCE_STATE_COPY_DEST,
169 NULL,
170 &IID_ID3D12Resource,
171 (void **)&qpool->collect_buffer);
172 if (FAILED(hres)) {
173 dzn_query_pool_destroy(qpool, alloc);
174 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
175 }
176
177 hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map);
178 if (FAILED(hres)) {
179 dzn_query_pool_destroy(qpool, alloc);
180 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181 }
182
183 memset(qpool->collect_map, 0, rdesc.Width);
184
185 *out = dzn_query_pool_to_handle(qpool);
186 return VK_SUCCESS;
187 }
188
189 uint32_t
dzn_query_pool_get_result_offset(const struct dzn_query_pool * qpool,uint32_t query)190 dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query)
191 {
192 return query * qpool->query_size;
193 }
194
195 uint32_t
dzn_query_pool_get_result_size(const struct dzn_query_pool * qpool,uint32_t query_count)196 dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count)
197 {
198 return query_count * qpool->query_size;
199 }
200
201 uint32_t
dzn_query_pool_get_availability_offset(const struct dzn_query_pool * qpool,uint32_t query)202 dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query)
203 {
204 return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query);
205 }
206
207 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)208 dzn_CreateQueryPool(VkDevice device,
209 const VkQueryPoolCreateInfo *pCreateInfo,
210 const VkAllocationCallbacks *pAllocator,
211 VkQueryPool *pQueryPool)
212 {
213 return dzn_query_pool_create(dzn_device_from_handle(device),
214 pCreateInfo, pAllocator, pQueryPool);
215 }
216
217 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)218 dzn_DestroyQueryPool(VkDevice device,
219 VkQueryPool queryPool,
220 const VkAllocationCallbacks *pAllocator)
221 {
222 dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator);
223 }
224
225 VKAPI_ATTR void VKAPI_CALL
dzn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)226 dzn_ResetQueryPool(VkDevice device,
227 VkQueryPool queryPool,
228 uint32_t firstQuery,
229 uint32_t queryCount)
230 {
231 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
232
233 mtx_lock(&qpool->queries_lock);
234 for (uint32_t q = 0; q < queryCount; q++) {
235 struct dzn_query *query = &qpool->queries[firstQuery + q];
236
237 query->fence_value = 0;
238 if (query->fence) {
239 ID3D12Fence_Release(query->fence);
240 query->fence = NULL;
241 }
242 }
243 mtx_lock(&qpool->queries_lock);
244
245 memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery),
246 0, queryCount * qpool->query_size);
247 memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery),
248 0, queryCount * sizeof(uint64_t));
249 }
250
251 VKAPI_ATTR VkResult VKAPI_CALL
dzn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)252 dzn_GetQueryPoolResults(VkDevice device,
253 VkQueryPool queryPool,
254 uint32_t firstQuery,
255 uint32_t queryCount,
256 size_t dataSize,
257 void *pData,
258 VkDeviceSize stride,
259 VkQueryResultFlags flags)
260 {
261 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
262
263 uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ?
264 sizeof(uint64_t) : sizeof(uint32_t);
265 VkResult result = VK_SUCCESS;
266
267 for (uint32_t q = 0; q < queryCount; q++) {
268 struct dzn_query *query = &qpool->queries[q + firstQuery];
269
270 uint8_t *dst_ptr = (uint8_t *)pData + (stride * q);
271 uint8_t *src_ptr =
272 (uint8_t *)qpool->collect_map +
273 dzn_query_pool_get_result_offset(qpool, firstQuery + q);
274 uint64_t available = 0;
275
276 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
277 ID3D12Fence *query_fence = NULL;
278 uint64_t query_fence_val = 0;
279
280 while (true) {
281 mtx_lock(&qpool->queries_lock);
282 if (query->fence) {
283 query_fence = query->fence;
284 ID3D12Fence_AddRef(query_fence);
285 }
286 query_fence_val = query->fence_value;
287 mtx_unlock(&qpool->queries_lock);
288
289 if (query_fence)
290 break;
291
292 /* Check again in 10ms.
293 * FIXME: decrease the polling period if it happens to hurt latency.
294 */
295 os_time_sleep(10 * 1000);
296 }
297
298 ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL);
299 ID3D12Fence_Release(query_fence);
300 available = UINT64_MAX;
301 } else {
302 ID3D12Fence *query_fence = NULL;
303 mtx_lock(&qpool->queries_lock);
304 if (query->fence) {
305 query_fence = query->fence;
306 ID3D12Fence_AddRef(query_fence);
307 }
308 uint64_t query_fence_val = query->fence_value;
309 mtx_unlock(&qpool->queries_lock);
310
311 if (query_fence) {
312 if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val)
313 available = UINT64_MAX;
314 ID3D12Fence_Release(query_fence);
315 }
316 }
317
318 if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
319 if (available)
320 memcpy(dst_ptr, src_ptr, step);
321 else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
322 memset(dst_ptr, 0, step);
323
324 dst_ptr += step;
325 } else {
326 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
327 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
328 continue;
329
330 if (available)
331 memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step);
332 else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
333 memset(dst_ptr, 0, step);
334
335 dst_ptr += step;
336 }
337 }
338
339 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
340 memcpy(dst_ptr, &available, step);
341
342 if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
343 result = VK_NOT_READY;
344 }
345
346 return result;
347 }
348