1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30
31
32 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)33 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
34 D3D12_RESOURCE_BARRIER *barriers,
35 uint32_t barrier_count)
36 {
37 uint32_t flush_count = 0;
38 for (uint32_t b = 0; b < barrier_count; b++) {
39 assert(barriers[b].Transition.pResource);
40
41 /* some layouts map to the same states, and NOP-barriers are illegal */
42 if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
43 if (flush_count) {
44 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
45 &barriers[b - flush_count]);
46 flush_count = 0;
47 }
48 } else {
49 flush_count++;
50 }
51 }
52
53 if (flush_count)
54 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
55 &barriers[barrier_count - flush_count]);
56
57 /* Set Before = After so we don't execute the same barrier twice. */
58 for (uint32_t b = 0; b < barrier_count; b++)
59 barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
60 }
61
62 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)63 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
64 ID3D12Resource *res,
65 uint32_t first_subres,
66 uint32_t subres_count)
67 {
68 struct hash_entry *he =
69 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
70 D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
71
72 if (!barriers)
73 return;
74
75 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
76 }
77
78 enum dzn_queue_transition_flags {
79 DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
80 DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
81 };
82
83 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)84 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
85 ID3D12Resource *res,
86 uint32_t first_subres,
87 uint32_t subres_count,
88 D3D12_RESOURCE_STATES before,
89 D3D12_RESOURCE_STATES after,
90 uint32_t flags)
91 {
92 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
93 struct hash_entry *he =
94 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
95 struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
96
97 if (!barriers) {
98 D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
99 D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
100 ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
101 uint32_t barrier_count =
102 fmt_info.PlaneCount *
103 desc.MipLevels * desc.DepthOrArraySize;
104
105 barriers =
106 vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
107 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
108 if (!barriers) {
109 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
110 return cmdbuf->error;
111 }
112
113 he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
114 if (!he) {
115 vk_free(&cmdbuf->vk.pool->alloc, barriers);
116 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117 return cmdbuf->error;
118 }
119 }
120
121 for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
122 if (!barriers[subres].Transition.pResource) {
123 barriers[subres] = (D3D12_RESOURCE_BARRIER) {
124 .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
125 .Flags = 0,
126 .Transition = {
127 .pResource = res,
128 .Subresource = subres,
129 .StateBefore = before,
130 .StateAfter = after,
131 },
132 };
133 } else {
134 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
135 before = barriers[subres].Transition.StateAfter;
136
137 assert(barriers[subres].Transition.StateAfter == before ||
138 barriers[subres].Transition.StateAfter == after);
139 barriers[subres].Transition.StateAfter = after;
140 }
141 }
142
143 if (flags & DZN_QUEUE_TRANSITION_FLUSH)
144 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
145
146 return VK_SUCCESS;
147 }
148
149 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)150 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
151 const struct dzn_image *image,
152 const VkImageSubresourceRange *range,
153 D3D12_RESOURCE_STATES before,
154 D3D12_RESOURCE_STATES after,
155 uint32_t flags)
156 {
157 uint32_t first_barrier = 0, barrier_count = 0;
158 VkResult ret = VK_SUCCESS;
159
160 dzn_foreach_aspect(aspect, range->aspectMask) {
161 uint32_t layer_count = dzn_get_layer_count(image, range);
162 uint32_t level_count = dzn_get_level_count(image, range);
163 for (uint32_t layer = 0; layer < layer_count; layer++) {
164 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165 if (!barrier_count) {
166 first_barrier = subres;
167 barrier_count = level_count;
168 continue;
169 } else if (first_barrier + barrier_count == subres) {
170 barrier_count += level_count;
171 continue;
172 }
173
174 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175 first_barrier, barrier_count,
176 before, after, flags);
177 if (ret != VK_SUCCESS)
178 return ret;
179
180 barrier_count = 0;
181 }
182
183 if (barrier_count) {
184 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185 first_barrier, barrier_count,
186 before, after, flags);
187 if (ret != VK_SUCCESS)
188 return ret;
189 }
190 }
191
192 return VK_SUCCESS;
193 }
194
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197 const struct dzn_image *image,
198 const VkImageSubresourceRange *range,
199 VkImageLayout old_layout,
200 VkImageLayout new_layout,
201 uint32_t flags)
202 {
203 uint32_t first_barrier = 0, barrier_count = 0;
204 VkResult ret = VK_SUCCESS;
205
206 if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
207 flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
208
209 dzn_foreach_aspect(aspect, range->aspectMask) {
210 D3D12_RESOURCE_STATES after =
211 dzn_image_layout_to_state(image, new_layout, aspect);
212 D3D12_RESOURCE_STATES before =
213 (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
214 old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
215 image->mem->initial_state :
216 dzn_image_layout_to_state(image, old_layout, aspect);
217
218 uint32_t layer_count = dzn_get_layer_count(image, range);
219 uint32_t level_count = dzn_get_level_count(image, range);
220 for (uint32_t layer = 0; layer < layer_count; layer++) {
221 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
222 if (!barrier_count) {
223 first_barrier = subres;
224 barrier_count = level_count;
225 continue;
226 } else if (first_barrier + barrier_count == subres) {
227 barrier_count += level_count;
228 continue;
229 }
230
231 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
232 first_barrier, barrier_count,
233 before, after, flags);
234 if (ret != VK_SUCCESS)
235 return ret;
236
237 barrier_count = 0;
238 }
239
240 if (barrier_count) {
241 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
242 first_barrier, barrier_count,
243 before, after, flags);
244 if (ret != VK_SUCCESS)
245 return ret;
246 }
247 }
248
249 return VK_SUCCESS;
250 }
251
252 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)253 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
254 {
255 if (!cbuf)
256 return;
257
258 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
259
260 if (cmdbuf->cmdlist)
261 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
262
263 if (cmdbuf->cmdalloc)
264 ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
265
266 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
267 list_del(&res->link);
268 ID3D12Resource_Release(res->res);
269 vk_free(&cbuf->pool->alloc, res);
270 }
271
272 dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
273 dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
274 dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
275 dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
276 util_dynarray_fini(&cmdbuf->events.wait);
277 util_dynarray_fini(&cmdbuf->events.signal);
278 util_dynarray_fini(&cmdbuf->queries.reset);
279 util_dynarray_fini(&cmdbuf->queries.wait);
280 util_dynarray_fini(&cmdbuf->queries.signal);
281
282 if (cmdbuf->rtvs.ht) {
283 hash_table_foreach(cmdbuf->rtvs.ht, he)
284 vk_free(&cbuf->pool->alloc, he->data);
285 _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
286 }
287
288 if (cmdbuf->dsvs.ht) {
289 hash_table_foreach(cmdbuf->dsvs.ht, he)
290 vk_free(&cbuf->pool->alloc, he->data);
291 _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
292 }
293
294 if (cmdbuf->events.ht)
295 _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
296
297 if (cmdbuf->queries.ht) {
298 hash_table_foreach(cmdbuf->queries.ht, he) {
299 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
300 util_dynarray_fini(&qpstate->reset);
301 util_dynarray_fini(&qpstate->collect);
302 util_dynarray_fini(&qpstate->wait);
303 util_dynarray_fini(&qpstate->signal);
304 vk_free(&cbuf->pool->alloc, he->data);
305 }
306 _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
307 }
308
309 if (cmdbuf->transition_barriers) {
310 hash_table_foreach(cmdbuf->transition_barriers, he)
311 vk_free(&cbuf->pool->alloc, he->data);
312 _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
313 }
314
315 vk_command_buffer_finish(&cmdbuf->vk);
316 vk_free(&cbuf->pool->alloc, cmdbuf);
317 }
318
319 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)320 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
321 {
322 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
323 }
324
325 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)326 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
327 {
328 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
329 }
330
331 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)332 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
333 {
334 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
335 }
336
337 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)338 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
339 {
340 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
341 }
342
343 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)344 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
345 VkCommandBuffer *out)
346 {
347 VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
348 struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
349 struct dzn_physical_device *pdev =
350 container_of(device->vk.physical, struct dzn_physical_device, vk);
351
352 assert(pool->queue_family_index < pdev->queue_family_count);
353
354 D3D12_COMMAND_LIST_TYPE type =
355 pdev->queue_families[pool->queue_family_index].desc.Type;
356
357 struct dzn_cmd_buffer *cmdbuf =
358 vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
359 if (!cmdbuf)
360 return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
361
362 VkResult result =
363 vk_command_buffer_init(&cmdbuf->vk, pool, info->level);
364 if (result != VK_SUCCESS) {
365 vk_free(&pool->alloc, cmdbuf);
366 return result;
367 }
368
369 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
370 list_inithead(&cmdbuf->internal_bufs);
371 util_dynarray_init(&cmdbuf->events.wait, NULL);
372 util_dynarray_init(&cmdbuf->events.signal, NULL);
373 util_dynarray_init(&cmdbuf->queries.reset, NULL);
374 util_dynarray_init(&cmdbuf->queries.wait, NULL);
375 util_dynarray_init(&cmdbuf->queries.signal, NULL);
376 dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
377 D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
378 false, &pool->alloc);
379 dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
380 D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
381 false, &pool->alloc);
382 dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
383 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
384 true, &pool->alloc);
385 dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
386 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
387 true, &pool->alloc);
388
389 cmdbuf->events.ht =
390 _mesa_pointer_hash_table_create(NULL);
391 cmdbuf->queries.ht =
392 _mesa_pointer_hash_table_create(NULL);
393 cmdbuf->transition_barriers =
394 _mesa_pointer_hash_table_create(NULL);
395 cmdbuf->rtvs.ht =
396 _mesa_hash_table_create(NULL,
397 dzn_cmd_buffer_rtv_key_hash_function,
398 dzn_cmd_buffer_rtv_key_equals_function);
399 cmdbuf->dsvs.ht =
400 _mesa_hash_table_create(NULL,
401 dzn_cmd_buffer_dsv_key_hash_function,
402 dzn_cmd_buffer_dsv_key_equals_function);
403 if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
404 !cmdbuf->transition_barriers ||
405 !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
406 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
407 goto out;
408 }
409
410 cmdbuf->vk.destroy = dzn_cmd_buffer_destroy;
411
412 if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
413 &IID_ID3D12CommandAllocator,
414 (void **)&cmdbuf->cmdalloc))) {
415 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
416 goto out;
417 }
418
419 if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type,
420 cmdbuf->cmdalloc, NULL,
421 &IID_ID3D12GraphicsCommandList1,
422 (void **)&cmdbuf->cmdlist))) {
423 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
424 goto out;
425 }
426
427 out:
428 if (result != VK_SUCCESS)
429 dzn_cmd_buffer_destroy(&cmdbuf->vk);
430 else
431 *out = dzn_cmd_buffer_to_handle(cmdbuf);
432
433 return result;
434 }
435
436 static VkResult
dzn_cmd_buffer_reset(struct dzn_cmd_buffer * cmdbuf)437 dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf)
438 {
439 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
440 const struct dzn_physical_device *pdev =
441 container_of(device->vk.physical, struct dzn_physical_device, vk);
442 const struct vk_command_pool *pool = cmdbuf->vk.pool;
443
444 /* Reset the state */
445 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
446
447 /* TODO: Return resources to the pool */
448 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
449 list_del(&res->link);
450 ID3D12Resource_Release(res->res);
451 vk_free(&cmdbuf->vk.pool->alloc, res);
452 }
453
454 cmdbuf->error = VK_SUCCESS;
455 util_dynarray_clear(&cmdbuf->events.wait);
456 util_dynarray_clear(&cmdbuf->events.signal);
457 util_dynarray_clear(&cmdbuf->queries.reset);
458 util_dynarray_clear(&cmdbuf->queries.wait);
459 util_dynarray_clear(&cmdbuf->queries.signal);
460 hash_table_foreach(cmdbuf->rtvs.ht, he)
461 vk_free(&cmdbuf->vk.pool->alloc, he->data);
462 _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
463 cmdbuf->null_rtv.ptr = 0;
464 dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
465 hash_table_foreach(cmdbuf->dsvs.ht, he)
466 vk_free(&cmdbuf->vk.pool->alloc, he->data);
467 _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
468 hash_table_foreach(cmdbuf->queries.ht, he) {
469 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
470 util_dynarray_fini(&qpstate->reset);
471 util_dynarray_fini(&qpstate->collect);
472 util_dynarray_fini(&qpstate->wait);
473 util_dynarray_fini(&qpstate->signal);
474 vk_free(&cmdbuf->vk.pool->alloc, he->data);
475 }
476 _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
477 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
478 hash_table_foreach(cmdbuf->transition_barriers, he)
479 vk_free(&cmdbuf->vk.pool->alloc, he->data);
480 _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
481 dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
482 dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
483 dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
484 vk_command_buffer_reset(&cmdbuf->vk);
485
486 /* cmdlist->Reset() doesn't return the memory back the the command list
487 * allocator, and cmdalloc->Reset() can only be called if there's no live
488 * cmdlist allocated from the allocator, so we need to release and create
489 * a new command list.
490 */
491 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
492 cmdbuf->cmdlist = NULL;
493 ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
494 D3D12_COMMAND_LIST_TYPE type =
495 pdev->queue_families[pool->queue_family_index].desc.Type;
496 if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0,
497 type,
498 cmdbuf->cmdalloc, NULL,
499 &IID_ID3D12GraphicsCommandList1,
500 (void **)&cmdbuf->cmdlist))) {
501 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
502 }
503
504 return cmdbuf->error;
505 }
506
507 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)508 dzn_AllocateCommandBuffers(VkDevice device,
509 const VkCommandBufferAllocateInfo *pAllocateInfo,
510 VkCommandBuffer *pCommandBuffers)
511 {
512 VK_FROM_HANDLE(dzn_device, dev, device);
513 VkResult result = VK_SUCCESS;
514 uint32_t i;
515
516 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
517 result = dzn_cmd_buffer_create(pAllocateInfo,
518 &pCommandBuffers[i]);
519 if (result != VK_SUCCESS)
520 break;
521 }
522
523 if (result != VK_SUCCESS) {
524 dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
525 i, pCommandBuffers);
526 for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
527 pCommandBuffers[i] = VK_NULL_HANDLE;
528 }
529
530 return result;
531 }
532
533 VKAPI_ATTR VkResult VKAPI_CALL
dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,VkCommandBufferResetFlags flags)534 dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,
535 VkCommandBufferResetFlags flags)
536 {
537 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
538
539 return dzn_cmd_buffer_reset(cmdbuf);
540 }
541
542 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)543 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
544 const VkCommandBufferBeginInfo *info)
545 {
546 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
547
548 /* If this is the first vkBeginCommandBuffer, we must *initialize* the
549 * command buffer's state. Otherwise, we must *reset* its state. In both
550 * cases we reset it.
551 *
552 * From the Vulkan 1.0 spec:
553 *
554 * If a command buffer is in the executable state and the command buffer
555 * was allocated from a command pool with the
556 * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
557 * vkBeginCommandBuffer implicitly resets the command buffer, behaving
558 * as if vkResetCommandBuffer had been called with
559 * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
560 * the command buffer in the recording state.
561 */
562 return dzn_cmd_buffer_reset(cmdbuf);
563 }
564
565 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)566 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
567 {
568 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
569
570 if (cmdbuf->error != VK_SUCCESS)
571 goto out;
572
573 hash_table_foreach(cmdbuf->events.ht, he) {
574 enum dzn_event_state state = (uintptr_t)he->data;
575
576 if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) {
577 struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
578 struct dzn_cmd_event_signal *entry =
579 util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
580
581 if (!entry) {
582 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583 break;
584 }
585
586 *entry = signal;
587 }
588 }
589
590 out:
591 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
592 }
593
594 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)595 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
596 {
597 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
598
599 if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
600 return VK_SUCCESS;
601
602 unsigned old_sz = array->size;
603 void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
604 if (!ptr) {
605 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
606 return cmdbuf->error;
607 }
608
609 memset(ptr, 0, array->size - old_sz);
610 return VK_SUCCESS;
611 }
612
613 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)614 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
615 {
616 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
617
618 if (bit < nbits)
619 return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
620
621 return false;
622 }
623
624 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)625 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
626 {
627 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
628 if (result != VK_SUCCESS)
629 return result;
630
631 BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
632 return VK_SUCCESS;
633 }
634
635 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)636 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
637 {
638 if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
639 return;
640
641 BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
642 }
643
644 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)645 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
646 struct util_dynarray *array,
647 uint32_t bit, uint32_t count)
648 {
649 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
650 if (result != VK_SUCCESS)
651 return result;
652
653 BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
654 return VK_SUCCESS;
655 }
656
657 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)658 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
659 struct util_dynarray *array,
660 uint32_t bit, uint32_t count)
661 {
662 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
663
664 if (!nbits)
665 return;
666
667 uint32_t end = MIN2(bit + count, nbits) - 1;
668
669 while (bit <= end) {
670 uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
671 BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
672 bit += subcount;
673 }
674 }
675
676 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)677 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
678 {
679 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
680 struct dzn_cmd_buffer_query_pool_state *state =
681 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
682 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
683 if (!state) {
684 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
685 return NULL;
686 }
687
688 util_dynarray_init(&state->reset, NULL);
689 util_dynarray_init(&state->collect, NULL);
690 util_dynarray_init(&state->wait, NULL);
691 util_dynarray_init(&state->signal, NULL);
692 return state;
693 }
694
695 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)696 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
697 struct dzn_cmd_buffer_query_pool_state *state)
698 {
699 util_dynarray_fini(&state->reset);
700 util_dynarray_fini(&state->collect);
701 util_dynarray_fini(&state->wait);
702 util_dynarray_fini(&state->signal);
703 vk_free(&cmdbuf->vk.pool->alloc, state);
704 }
705
706 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)707 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
708 struct dzn_query_pool *qpool)
709 {
710 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
711 struct dzn_cmd_buffer_query_pool_state *state = NULL;
712 struct hash_entry *he =
713 _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
714
715 if (!he) {
716 state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
717 if (!state)
718 return NULL;
719
720 he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
721 if (!he) {
722 dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
723 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
724 return NULL;
725 }
726 } else {
727 state = he->data;
728 }
729
730 return state;
731 }
732
733 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)734 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
735 const struct dzn_query_pool *qpool,
736 struct dzn_cmd_buffer_query_pool_state *state,
737 uint32_t first_query,
738 uint32_t query_count)
739 {
740 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
741 uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
742 uint32_t start, end;
743
744 if (!nbits)
745 return VK_SUCCESS;
746
747 query_count = MIN2(query_count, nbits - first_query);
748 nbits = MIN2(first_query + query_count, nbits);
749
750 VkResult result =
751 dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
752 if (result != VK_SUCCESS)
753 return result;
754
755 dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
756
757 BITSET_WORD *collect =
758 util_dynarray_element(&state->collect, BITSET_WORD, 0);
759
760 for (start = first_query, end = first_query,
761 __bitset_next_range(&start, &end, collect, nbits);
762 start < nbits;
763 __bitset_next_range(&start, &end, collect, nbits)) {
764 ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
765 qpool->heap,
766 qpool->queries[start].type,
767 start, end - start,
768 qpool->resolve_buffer,
769 qpool->query_size * start);
770 }
771
772 uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
773 uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
774
775 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
776 0, 1,
777 D3D12_RESOURCE_STATE_COPY_DEST,
778 D3D12_RESOURCE_STATE_COPY_SOURCE,
779 DZN_QUEUE_TRANSITION_FLUSH);
780
781 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
782 qpool->collect_buffer, offset,
783 qpool->resolve_buffer, offset,
784 size);
785
786 for (start = first_query, end = first_query,
787 __bitset_next_range(&start, &end, collect, nbits);
788 start < nbits;
789 __bitset_next_range(&start, &end, collect, nbits)) {
790 uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
791 uint32_t count = end - start;
792
793 for (unsigned i = 0; i < count; i += step) {
794 uint32_t sub_count = MIN2(step, count - i);
795
796 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
797 qpool->collect_buffer,
798 dzn_query_pool_get_availability_offset(qpool, start + i),
799 device->queries.refs,
800 DZN_QUERY_REFS_ALL_ONES_OFFSET,
801 sizeof(uint64_t) * sub_count);
802 }
803
804 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
805 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count);
806 }
807
808 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
809 0, 1,
810 D3D12_RESOURCE_STATE_COPY_SOURCE,
811 D3D12_RESOURCE_STATE_COPY_DEST,
812 0);
813 return VK_SUCCESS;
814 }
815
816 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)817 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
818 struct dzn_query_pool *qpool,
819 struct util_dynarray *bitset_array,
820 struct util_dynarray *ops_array)
821 {
822 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
823 BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
824 uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
825 uint32_t start, end;
826
827 BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
828 struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
829 struct dzn_cmd_buffer_query_range *entry =
830 util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
831
832 if (!entry) {
833 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
834 return cmdbuf->error;
835 }
836
837 *entry = range;
838 }
839
840 return VK_SUCCESS;
841 }
842
843 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)844 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
845 {
846 hash_table_foreach(cmdbuf->queries.ht, he) {
847 struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
848 struct dzn_cmd_buffer_query_pool_state *state = he->data;
849 VkResult result =
850 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
851 if (result != VK_SUCCESS)
852 return result;
853
854 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
855 if (result != VK_SUCCESS)
856 return result;
857
858 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait);
859 if (result != VK_SUCCESS)
860 return result;
861
862 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
863 if (result != VK_SUCCESS)
864 return result;
865 }
866
867 return VK_SUCCESS;
868 }
869
870 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)871 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
872 {
873 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
874
875 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
876 dzn_cmd_buffer_gather_events(cmdbuf);
877 dzn_cmd_buffer_gather_queries(cmdbuf);
878 HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
879 if (FAILED(hres))
880 cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
881 } else {
882 cmdbuf->error = cmdbuf->vk.cmd_queue.error;
883 }
884
885 return cmdbuf->error;
886 }
887
888 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)889 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
890 const VkDependencyInfo *info)
891 {
892 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
893
894 bool execution_barrier =
895 !info->memoryBarrierCount &&
896 !info->bufferMemoryBarrierCount &&
897 !info->imageMemoryBarrierCount;
898
899 if (execution_barrier) {
900 /* Execution barrier can be emulated with a NULL UAV barrier (AKA
901 * pipeline flush). That's the best we can do with the standard D3D12
902 * barrier API.
903 */
904 D3D12_RESOURCE_BARRIER barrier = {
905 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
906 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
907 .UAV = { .pResource = NULL },
908 };
909
910 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
911 }
912
913 /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
914 * Scopes are not taken into account, but that's inherent to the current
915 * D3D12 barrier API.
916 */
917 if (info->memoryBarrierCount) {
918 D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
919
920 barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
921 barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
922 barriers[0].UAV.pResource = NULL;
923 barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
924 barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
925 barriers[1].Aliasing.pResourceBefore = NULL;
926 barriers[1].Aliasing.pResourceAfter = NULL;
927 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
928 }
929
930 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
931 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
932 D3D12_RESOURCE_BARRIER barrier = { 0 };
933
934 /* UAV are used only for storage buffers, skip all other buffers. */
935 if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
936 continue;
937
938 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
939 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
940 barrier.UAV.pResource = buf->res;
941 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
942 }
943
944 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
945 const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
946 const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
947 VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
948
949 /* We use placed resource's simple model, in which only one resource
950 * pointing to a given heap is active at a given time. To make the
951 * resource active we need to add an aliasing barrier.
952 */
953 D3D12_RESOURCE_BARRIER aliasing_barrier = {
954 .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING,
955 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
956 .Aliasing = {
957 .pResourceBefore = NULL,
958 .pResourceAfter = image->res,
959 },
960 };
961
962 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier);
963
964 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
965 ibarrier->oldLayout,
966 ibarrier->newLayout,
967 DZN_QUEUE_TRANSITION_FLUSH);
968 }
969 }
970
971 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)972 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
973 const struct dzn_image *image,
974 const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
975 {
976 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
977 struct dzn_cmd_buffer_dsv_key key = { image, *desc };
978 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
979 struct dzn_cmd_buffer_dsv_entry *dsve;
980
981 if (!he) {
982 struct dzn_descriptor_heap *heap;
983 uint32_t slot;
984
985 // TODO: error handling
986 dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
987 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
988 dsve->key = key;
989 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
990 dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
991 ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
992 _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
993 } else {
994 dsve = he->data;
995 }
996
997 return dsve->handle;
998 }
999
1000 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1001 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1002 const struct dzn_image *image,
1003 const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1004 {
1005 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1006 struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1007 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1008 struct dzn_cmd_buffer_rtv_entry *rtve;
1009
1010 if (!he) {
1011 struct dzn_descriptor_heap *heap;
1012 uint32_t slot;
1013
1014 // TODO: error handling
1015 rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1016 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1017 rtve->key = key;
1018 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1019 rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1020 ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1021 he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1022 } else {
1023 rtve = he->data;
1024 }
1025
1026 return rtve->handle;
1027 }
1028
1029 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1030 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1031 {
1032 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1033
1034 if (!cmdbuf->null_rtv.ptr) {
1035 struct dzn_descriptor_heap *heap;
1036 uint32_t slot;
1037 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1038 cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1039
1040 D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1041 desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1042 desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1043 desc.Texture2D.MipSlice = 0;
1044 desc.Texture2D.PlaneSlice = 0;
1045
1046 ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1047 }
1048
1049 return cmdbuf->null_rtv;
1050 }
1051
1052 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,D3D12_HEAP_TYPE heap_type,D3D12_RESOURCE_STATES init_state,ID3D12Resource ** out)1053 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1054 uint32_t size,
1055 D3D12_HEAP_TYPE heap_type,
1056 D3D12_RESOURCE_STATES init_state,
1057 ID3D12Resource **out)
1058 {
1059 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1060 ID3D12Resource *res;
1061 *out = NULL;
1062
1063 /* Align size on 64k (the default alignment) */
1064 size = ALIGN_POT(size, 64 * 1024);
1065
1066 D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, heap_type);
1067 D3D12_RESOURCE_DESC rdesc = {
1068 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1069 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1070 .Width = size,
1071 .Height = 1,
1072 .DepthOrArraySize = 1,
1073 .MipLevels = 1,
1074 .Format = DXGI_FORMAT_UNKNOWN,
1075 .SampleDesc = { .Count = 1, .Quality = 0 },
1076 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1077 .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1078 };
1079
1080 HRESULT hres =
1081 ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1082 D3D12_HEAP_FLAG_NONE, &rdesc,
1083 init_state, NULL,
1084 &IID_ID3D12Resource,
1085 (void **)&res);
1086 if (FAILED(hres)) {
1087 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1088 return cmdbuf->error;
1089 }
1090
1091 struct dzn_internal_resource *entry =
1092 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1093 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1094 if (!entry) {
1095 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1096 ID3D12Resource_Release(res);
1097 return cmdbuf->error;
1098 }
1099
1100 entry->res = res;
1101 list_addtail(&entry->link, &cmdbuf->internal_bufs);
1102 *out = entry->res;
1103 return VK_SUCCESS;
1104 }
1105
1106 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1107 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1108 const struct dzn_image *image,
1109 VkImageLayout layout,
1110 const VkClearColorValue *color,
1111 const VkImageSubresourceRange *range,
1112 uint32_t rect_count, D3D12_RECT *rects)
1113 {
1114 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1115 uint32_t blksize = util_format_get_blocksize(pfmt);
1116 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1117 uint32_t raw[4] = { 0 };
1118
1119 assert(blksize <= sizeof(raw));
1120 assert(!(sizeof(buf) % blksize));
1121
1122 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1123
1124 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1125 while (fill_step % blksize)
1126 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1127
1128 uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1129 uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1130 uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1131 uint32_t res_size = max_h * row_pitch;
1132
1133 assert(fill_step <= sizeof(buf));
1134
1135 for (uint32_t i = 0; i < fill_step; i += blksize)
1136 memcpy(&buf[i], raw, blksize);
1137
1138 ID3D12Resource *src_res;
1139
1140 VkResult result =
1141 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1142 D3D12_HEAP_TYPE_UPLOAD,
1143 D3D12_RESOURCE_STATE_GENERIC_READ,
1144 &src_res);
1145 if (result != VK_SUCCESS)
1146 return;
1147
1148 assert(!(res_size % fill_step));
1149
1150 uint8_t *cpu_ptr;
1151 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1152 for (uint32_t i = 0; i < res_size; i += fill_step)
1153 memcpy(&cpu_ptr[i], buf, fill_step);
1154
1155 ID3D12Resource_Unmap(src_res, 0, NULL);
1156
1157 D3D12_TEXTURE_COPY_LOCATION src_loc = {
1158 .pResource = src_res,
1159 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1160 .PlacedFootprint = {
1161 .Offset = 0,
1162 .Footprint = {
1163 .Width = max_w,
1164 .Height = max_h,
1165 .Depth = 1,
1166 .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1167 },
1168 },
1169 };
1170
1171 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1172 D3D12_RESOURCE_STATE_GENERIC_READ,
1173 D3D12_RESOURCE_STATE_COPY_SOURCE,
1174 DZN_QUEUE_TRANSITION_FLUSH);
1175
1176 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1177 layout,
1178 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1179 DZN_QUEUE_TRANSITION_FLUSH);
1180
1181 assert(dzn_get_level_count(image, range) == 1);
1182 uint32_t layer_count = dzn_get_layer_count(image, range);
1183
1184 dzn_foreach_aspect(aspect, range->aspectMask) {
1185 VkImageSubresourceLayers subres = {
1186 .aspectMask = (VkImageAspectFlags)aspect,
1187 .mipLevel = range->baseMipLevel,
1188 .baseArrayLayer = range->baseArrayLayer,
1189 .layerCount = layer_count,
1190 };
1191
1192 for (uint32_t layer = 0; layer < layer_count; layer++) {
1193 D3D12_TEXTURE_COPY_LOCATION dst_loc =
1194 dzn_image_get_copy_loc(image, &subres, aspect, layer);
1195
1196 src_loc.PlacedFootprint.Footprint.Format =
1197 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1198 dst_loc.PlacedFootprint.Footprint.Format :
1199 image->desc.Format;
1200
1201 for (uint32_t r = 0; r < rect_count; r++) {
1202 D3D12_BOX src_box = {
1203 .left = 0,
1204 .top = 0,
1205 .front = 0,
1206 .right = (UINT)(rects[r].right - rects[r].left),
1207 .bottom = (UINT)(rects[r].bottom - rects[r].top),
1208 .back = 1,
1209 };
1210
1211 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1212 &dst_loc,
1213 rects[r].left,
1214 rects[r].top, 0,
1215 &src_loc,
1216 &src_box);
1217 }
1218 }
1219 }
1220
1221 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1222 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1223 layout,
1224 DZN_QUEUE_TRANSITION_FLUSH);
1225 }
1226
1227 static VkClearColorValue
adjust_clear_color(VkFormat format,const VkClearColorValue * col)1228 adjust_clear_color(VkFormat format, const VkClearColorValue *col)
1229 {
1230 VkClearColorValue out = *col;
1231
1232 // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1233 // manually where it matters, like here, in the clear path.
1234 if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1235 DZN_SWAP(float, out.float32[0], out.float32[1]);
1236 DZN_SWAP(float, out.float32[2], out.float32[3]);
1237 }
1238
1239 return out;
1240 }
1241
1242 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1243 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1244 const struct dzn_image *image,
1245 VkImageLayout layout,
1246 const VkClearColorValue *color,
1247 uint32_t range_count,
1248 const VkImageSubresourceRange *ranges)
1249 {
1250 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1251 uint32_t blksize = util_format_get_blocksize(pfmt);
1252 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1253 uint32_t raw[4] = { 0 };
1254
1255 assert(blksize <= sizeof(raw));
1256 assert(!(sizeof(buf) % blksize));
1257
1258 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1259
1260 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1261 while (fill_step % blksize)
1262 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1263
1264 uint32_t res_size = 0;
1265 for (uint32_t r = 0; r < range_count; r++) {
1266 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
1267 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
1268 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
1269 uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
1270
1271 res_size = MAX2(res_size, h * d * row_pitch);
1272 }
1273
1274 assert(fill_step <= sizeof(buf));
1275
1276 for (uint32_t i = 0; i < fill_step; i += blksize)
1277 memcpy(&buf[i], raw, blksize);
1278
1279 ID3D12Resource *src_res;
1280
1281 VkResult result =
1282 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1283 D3D12_HEAP_TYPE_UPLOAD,
1284 D3D12_RESOURCE_STATE_GENERIC_READ,
1285 &src_res);
1286 if (result != VK_SUCCESS)
1287 return;
1288
1289 assert(!(res_size % fill_step));
1290
1291 uint8_t *cpu_ptr;
1292 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1293 for (uint32_t i = 0; i < res_size; i += fill_step)
1294 memcpy(&cpu_ptr[i], buf, fill_step);
1295
1296 ID3D12Resource_Unmap(src_res, 0, NULL);
1297
1298 D3D12_TEXTURE_COPY_LOCATION src_loc = {
1299 .pResource = src_res,
1300 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1301 .PlacedFootprint = {
1302 .Offset = 0,
1303 },
1304 };
1305
1306 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1307 D3D12_RESOURCE_STATE_GENERIC_READ,
1308 D3D12_RESOURCE_STATE_COPY_SOURCE,
1309 DZN_QUEUE_TRANSITION_FLUSH);
1310
1311 for (uint32_t r = 0; r < range_count; r++) {
1312 uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
1313 uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
1314
1315 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1316 layout,
1317 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1318 DZN_QUEUE_TRANSITION_FLUSH);
1319
1320 dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
1321 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1322 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
1323 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
1324 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
1325 VkImageSubresourceLayers subres = {
1326 .aspectMask = (VkImageAspectFlags)aspect,
1327 .mipLevel = ranges[r].baseMipLevel + lvl,
1328 .baseArrayLayer = ranges[r].baseArrayLayer,
1329 .layerCount = layer_count,
1330 };
1331
1332 for (uint32_t layer = 0; layer < layer_count; layer++) {
1333 D3D12_TEXTURE_COPY_LOCATION dst_loc =
1334 dzn_image_get_copy_loc(image, &subres, aspect, layer);
1335
1336 src_loc.PlacedFootprint.Footprint.Format =
1337 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1338 dst_loc.PlacedFootprint.Footprint.Format :
1339 image->desc.Format;
1340 src_loc.PlacedFootprint.Footprint.Width = w;
1341 src_loc.PlacedFootprint.Footprint.Height = h;
1342 src_loc.PlacedFootprint.Footprint.Depth = d;
1343 src_loc.PlacedFootprint.Footprint.RowPitch =
1344 ALIGN_NPOT(w * blksize, fill_step);
1345 D3D12_BOX src_box = {
1346 .left = 0,
1347 .top = 0,
1348 .front = 0,
1349 .right = w,
1350 .bottom = h,
1351 .back = d,
1352 };
1353
1354 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
1355 &src_loc, &src_box);
1356
1357 }
1358 }
1359 }
1360
1361 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1362 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1363 layout,
1364 DZN_QUEUE_TRANSITION_FLUSH);
1365 }
1366 }
1367
1368 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)1369 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
1370 struct dzn_image_view *view,
1371 VkImageLayout layout,
1372 const VkClearValue *value,
1373 VkImageAspectFlags aspects,
1374 uint32_t base_layer,
1375 uint32_t layer_count,
1376 uint32_t rect_count,
1377 D3D12_RECT *rects)
1378 {
1379 struct dzn_image *image =
1380 container_of(view->vk.image, struct dzn_image, vk);
1381
1382 VkImageSubresourceRange range = {
1383 .aspectMask = aspects,
1384 .baseMipLevel = view->vk.base_mip_level,
1385 .levelCount = 1,
1386 .baseArrayLayer = view->vk.base_array_layer + base_layer,
1387 .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
1388 view->vk.layer_count - base_layer : layer_count,
1389 };
1390
1391 layer_count = vk_image_subresource_layer_count(&image->vk, &range);
1392
1393 if (vk_format_is_depth_or_stencil(view->vk.format)) {
1394 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1395
1396 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1397 flags |= D3D12_CLEAR_FLAG_DEPTH;
1398 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1399 flags |= D3D12_CLEAR_FLAG_STENCIL;
1400
1401 if (flags != 0) {
1402 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1403 layout,
1404 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1405 DZN_QUEUE_TRANSITION_FLUSH);
1406
1407 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
1408 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1409 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
1410 value->depthStencil.depth,
1411 value->depthStencil.stencil,
1412 rect_count, rects);
1413
1414 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1415 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1416 layout,
1417 DZN_QUEUE_TRANSITION_FLUSH);
1418 }
1419 } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1420 VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color);
1421 bool clear_with_cpy = false;
1422 float vals[4];
1423
1424 if (vk_format_is_sint(view->vk.format)) {
1425 for (uint32_t i = 0; i < 4; i++) {
1426 vals[i] = color.int32[i];
1427 if (color.int32[i] != (int32_t)vals[i]) {
1428 clear_with_cpy = true;
1429 break;
1430 }
1431 }
1432 } else if (vk_format_is_uint(view->vk.format)) {
1433 for (uint32_t i = 0; i < 4; i++) {
1434 vals[i] = color.uint32[i];
1435 if (color.uint32[i] != (uint32_t)vals[i]) {
1436 clear_with_cpy = true;
1437 break;
1438 }
1439 }
1440 } else {
1441 for (uint32_t i = 0; i < 4; i++)
1442 vals[i] = color.float32[i];
1443 }
1444
1445 if (clear_with_cpy) {
1446 dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
1447 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1448 &value->color,
1449 &range, rect_count, rects);
1450 } else {
1451 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1452 layout,
1453 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1454 DZN_QUEUE_TRANSITION_FLUSH);
1455
1456 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
1457 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1458 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
1459
1460 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1461 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1462 layout,
1463 DZN_QUEUE_TRANSITION_FLUSH);
1464 }
1465 }
1466 }
1467
1468 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)1469 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
1470 const struct dzn_image *image,
1471 VkImageLayout layout,
1472 const VkClearColorValue *col,
1473 uint32_t range_count,
1474 const VkImageSubresourceRange *ranges)
1475 {
1476 if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
1477 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1478 return;
1479 }
1480
1481 VkClearColorValue color = adjust_clear_color(image->vk.format, col);
1482 float clear_vals[4];
1483
1484 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1485
1486 if (util_format_is_pure_sint(pfmt)) {
1487 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1488 clear_vals[c] = color.int32[c];
1489 if (color.int32[c] != (int32_t)clear_vals[c]) {
1490 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1491 return;
1492 }
1493 }
1494 } else if (util_format_is_pure_uint(pfmt)) {
1495 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1496 clear_vals[c] = color.uint32[c];
1497 if (color.uint32[c] != (uint32_t)clear_vals[c]) {
1498 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1499 return;
1500 }
1501 }
1502 } else {
1503 memcpy(clear_vals, color.float32, sizeof(clear_vals));
1504 }
1505
1506 for (uint32_t r = 0; r < range_count; r++) {
1507 const VkImageSubresourceRange *range = &ranges[r];
1508 uint32_t level_count = dzn_get_level_count(image, range);
1509
1510 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1511 layout,
1512 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1513 DZN_QUEUE_TRANSITION_FLUSH);
1514 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1515 VkImageSubresourceRange view_range = *range;
1516
1517 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1518 view_range.baseArrayLayer = 0;
1519 view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
1520 }
1521
1522 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
1523 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1524 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
1525 }
1526
1527 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1528 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1529 layout,
1530 DZN_QUEUE_TRANSITION_FLUSH);
1531 }
1532 }
1533
1534 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)1535 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
1536 const struct dzn_image *image,
1537 VkImageLayout layout,
1538 const VkClearDepthStencilValue *zs,
1539 uint32_t range_count,
1540 const VkImageSubresourceRange *ranges)
1541 {
1542 assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
1543
1544 for (uint32_t r = 0; r < range_count; r++) {
1545 const VkImageSubresourceRange *range = &ranges[r];
1546 uint32_t level_count = dzn_get_level_count(image, range);
1547
1548 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1549
1550 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1551 flags |= D3D12_CLEAR_FLAG_DEPTH;
1552 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
1553 flags |= D3D12_CLEAR_FLAG_STENCIL;
1554
1555 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1556 layout,
1557 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1558 DZN_QUEUE_TRANSITION_FLUSH);
1559
1560 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1561 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
1562 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1563 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
1564 handle, flags,
1565 zs->depth,
1566 zs->stencil,
1567 0, NULL);
1568 }
1569
1570 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1571 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1572 layout,
1573 DZN_QUEUE_TRANSITION_FLUSH);
1574 }
1575 }
1576
1577 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1578 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
1579 const VkCopyBufferToImageInfo2 *info,
1580 uint32_t r,
1581 VkImageAspectFlagBits aspect,
1582 uint32_t l)
1583 {
1584 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
1585 VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
1586
1587 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1588
1589 VkBufferImageCopy2 region = info->pRegions[r];
1590 enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
1591 uint32_t blkh = util_format_get_blockheight(pfmt);
1592 uint32_t blkd = util_format_get_blockdepth(pfmt);
1593
1594 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1595 * to not be block aligned if it's reaching the image boundary, offsets still
1596 * have to be aligned. Align the image extent to make D3D12 happy.
1597 */
1598 dzn_image_align_extent(dst_image, ®ion.imageExtent);
1599
1600 D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
1601 dzn_image_get_copy_loc(dst_image, ®ion.imageSubresource, aspect, l);
1602 D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
1603 dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, ®ion, aspect, l);
1604
1605 if (dzn_buffer_supports_region_copy(&src_buf_loc)) {
1606 /* RowPitch and Offset are properly aligned, we can copy
1607 * the whole thing in one call.
1608 */
1609 D3D12_BOX src_box = {
1610 .left = 0,
1611 .top = 0,
1612 .front = 0,
1613 .right = region.imageExtent.width,
1614 .bottom = region.imageExtent.height,
1615 .back = region.imageExtent.depth,
1616 };
1617
1618 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
1619 region.imageOffset.x,
1620 region.imageOffset.y,
1621 region.imageOffset.z,
1622 &src_buf_loc, &src_box);
1623 return;
1624 }
1625
1626 /* Copy line-by-line if things are not properly aligned. */
1627 D3D12_BOX src_box = {
1628 .top = 0,
1629 .front = 0,
1630 .bottom = blkh,
1631 .back = blkd,
1632 };
1633
1634 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1635 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1636 uint32_t src_x;
1637
1638 D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
1639 dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
1640 ®ion, &src_buf_loc,
1641 y, z, &src_x);
1642
1643 src_box.left = src_x;
1644 src_box.right = src_x + region.imageExtent.width;
1645 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1646 &dst_img_loc,
1647 region.imageOffset.x,
1648 region.imageOffset.y + y,
1649 region.imageOffset.z + z,
1650 &src_buf_line_loc,
1651 &src_box);
1652 }
1653 }
1654 }
1655
1656 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1657 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
1658 const VkCopyImageToBufferInfo2 *info,
1659 uint32_t r,
1660 VkImageAspectFlagBits aspect,
1661 uint32_t l)
1662 {
1663 VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
1664 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
1665
1666 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1667
1668 VkBufferImageCopy2 region = info->pRegions[r];
1669 enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
1670 uint32_t blkh = util_format_get_blockheight(pfmt);
1671 uint32_t blkd = util_format_get_blockdepth(pfmt);
1672
1673 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1674 * to not be block aligned if it's reaching the image boundary, offsets still
1675 * have to be aligned. Align the image extent to make D3D12 happy.
1676 */
1677 dzn_image_align_extent(src_image, ®ion.imageExtent);
1678
1679 D3D12_TEXTURE_COPY_LOCATION src_img_loc =
1680 dzn_image_get_copy_loc(src_image, ®ion.imageSubresource, aspect, l);
1681 D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
1682 dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, ®ion, aspect, l);
1683
1684 if (dzn_buffer_supports_region_copy(&dst_buf_loc)) {
1685 /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
1686 * the whole thing in one call.
1687 */
1688 D3D12_BOX src_box = {
1689 .left = (UINT)region.imageOffset.x,
1690 .top = (UINT)region.imageOffset.y,
1691 .front = (UINT)region.imageOffset.z,
1692 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1693 .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
1694 .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
1695 };
1696
1697 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
1698 0, 0, 0, &src_img_loc,
1699 &src_box);
1700 return;
1701 }
1702
1703 D3D12_BOX src_box = {
1704 .left = (UINT)region.imageOffset.x,
1705 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1706 };
1707
1708 /* Copy line-by-line if things are not properly aligned. */
1709 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1710 src_box.front = region.imageOffset.z + z;
1711 src_box.back = src_box.front + blkd;
1712
1713 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1714 uint32_t dst_x;
1715
1716 D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
1717 dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
1718 ®ion, &dst_buf_loc,
1719 y, z, &dst_x);
1720
1721 src_box.top = region.imageOffset.y + y;
1722 src_box.bottom = src_box.top + blkh;
1723
1724 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1725 &dst_buf_line_loc,
1726 dst_x, 0, 0,
1727 &src_img_loc,
1728 &src_box);
1729 }
1730 }
1731 }
1732
1733 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1734 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
1735 const VkCopyImageInfo2 *info,
1736 D3D12_RESOURCE_DESC *tmp_desc,
1737 D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
1738 uint32_t r,
1739 VkImageAspectFlagBits aspect,
1740 uint32_t l)
1741 {
1742 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1743 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
1744 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
1745
1746 ID3D12Device2 *dev = device->dev;
1747 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1748
1749 VkImageCopy2 region = info->pRegions[r];
1750 dzn_image_align_extent(src, ®ion.extent);
1751
1752 const VkImageSubresourceLayers *src_subres = ®ion.srcSubresource;
1753 const VkImageSubresourceLayers *dst_subres = ®ion.dstSubresource;
1754 VkFormat src_format =
1755 dzn_image_get_plane_format(src->vk.format, aspect);
1756 VkFormat dst_format =
1757 dzn_image_get_plane_format(dst->vk.format, aspect);
1758
1759 enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
1760 uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
1761 uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
1762 uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
1763 enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
1764 uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
1765 uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
1766 uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
1767 uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
1768 uint32_t depth = region.extent.depth;
1769 uint32_t dst_l = l, src_l = l;
1770
1771 assert(src_subres->aspectMask == dst_subres->aspectMask);
1772
1773 if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
1774 dst->vk.image_type == VK_IMAGE_TYPE_2D) {
1775 assert(src_subres->layerCount == 1);
1776 src_l = 0;
1777 src_z += l;
1778 depth = 1;
1779 } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
1780 dst->vk.image_type == VK_IMAGE_TYPE_3D) {
1781 assert(dst_subres->layerCount == 1);
1782 dst_l = 0;
1783 dst_z += l;
1784 depth = 1;
1785 } else {
1786 assert(src_subres->layerCount == dst_subres->layerCount);
1787 }
1788
1789 D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
1790 D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
1791
1792 D3D12_BOX src_box = {
1793 .left = (UINT)MAX2(region.srcOffset.x, 0),
1794 .top = (UINT)MAX2(region.srcOffset.y, 0),
1795 .front = (UINT)MAX2(src_z, 0),
1796 .right = (UINT)region.srcOffset.x + region.extent.width,
1797 .bottom = (UINT)region.srcOffset.y + region.extent.height,
1798 .back = (UINT)src_z + depth,
1799 };
1800
1801 if (!tmp_loc->pResource) {
1802 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1803 region.dstOffset.x,
1804 region.dstOffset.y,
1805 dst_z, &src_loc,
1806 &src_box);
1807 return;
1808 }
1809
1810 tmp_desc->Format =
1811 dzn_image_get_placed_footprint_format(src->vk.format, aspect);
1812 tmp_desc->Width = region.extent.width;
1813 tmp_desc->Height = region.extent.height;
1814
1815 ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
1816 0, 1, 0,
1817 &tmp_loc->PlacedFootprint,
1818 NULL, NULL, NULL);
1819
1820 tmp_loc->PlacedFootprint.Footprint.Depth = depth;
1821
1822 if (r > 0 || l > 0) {
1823 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1824 D3D12_RESOURCE_STATE_COPY_SOURCE,
1825 D3D12_RESOURCE_STATE_COPY_DEST,
1826 DZN_QUEUE_TRANSITION_FLUSH);
1827 }
1828
1829 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
1830
1831 if (r > 0 || l > 0) {
1832 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1833 D3D12_RESOURCE_STATE_COPY_DEST,
1834 D3D12_RESOURCE_STATE_COPY_SOURCE,
1835 DZN_QUEUE_TRANSITION_FLUSH);
1836 }
1837
1838 tmp_desc->Format =
1839 dzn_image_get_placed_footprint_format(dst->vk.format, aspect);
1840 if (src_blkw != dst_blkw)
1841 tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
1842 if (src_blkh != dst_blkh)
1843 tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
1844
1845 ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
1846 0, 1, 0,
1847 &tmp_loc->PlacedFootprint,
1848 NULL, NULL, NULL);
1849
1850 if (src_blkd != dst_blkd) {
1851 tmp_loc->PlacedFootprint.Footprint.Depth =
1852 DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
1853 } else {
1854 tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
1855 }
1856
1857 D3D12_BOX tmp_box = {
1858 .left = 0,
1859 .top = 0,
1860 .front = 0,
1861 .right = tmp_loc->PlacedFootprint.Footprint.Width,
1862 .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
1863 .back = tmp_loc->PlacedFootprint.Footprint.Depth,
1864 };
1865
1866 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1867 region.dstOffset.x,
1868 region.dstOffset.y,
1869 dst_z,
1870 tmp_loc, &tmp_box);
1871 }
1872
1873 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)1874 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
1875 VkImage image,
1876 VkImageAspectFlagBits aspect,
1877 const VkImageSubresourceLayers *subres,
1878 struct dzn_descriptor_heap *heap,
1879 uint32_t heap_slot)
1880 {
1881 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1882 VK_FROM_HANDLE(dzn_image, img, image);
1883 VkImageViewCreateInfo iview_info = {
1884 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1885 .image = image,
1886 .format = img->vk.format,
1887 .subresourceRange = {
1888 .aspectMask = (VkImageAspectFlags)aspect,
1889 .baseMipLevel = subres->mipLevel,
1890 .levelCount = 1,
1891 .baseArrayLayer = subres->baseArrayLayer,
1892 .layerCount = subres->layerCount,
1893 },
1894 };
1895
1896 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1897 iview_info.components.r = VK_COMPONENT_SWIZZLE_G;
1898 iview_info.components.g = VK_COMPONENT_SWIZZLE_G;
1899 iview_info.components.b = VK_COMPONENT_SWIZZLE_G;
1900 iview_info.components.a = VK_COMPONENT_SWIZZLE_G;
1901 } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1902 iview_info.components.r = VK_COMPONENT_SWIZZLE_R;
1903 iview_info.components.g = VK_COMPONENT_SWIZZLE_R;
1904 iview_info.components.b = VK_COMPONENT_SWIZZLE_R;
1905 iview_info.components.a = VK_COMPONENT_SWIZZLE_R;
1906 }
1907
1908 switch (img->vk.image_type) {
1909 case VK_IMAGE_TYPE_1D:
1910 iview_info.viewType = img->vk.array_layers > 1 ?
1911 VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
1912 break;
1913 case VK_IMAGE_TYPE_2D:
1914 iview_info.viewType = img->vk.array_layers > 1 ?
1915 VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
1916 break;
1917 case VK_IMAGE_TYPE_3D:
1918 iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
1919 break;
1920 default:
1921 unreachable("Invalid type");
1922 }
1923
1924 struct dzn_image_view iview;
1925 dzn_image_view_init(device, &iview, &iview_info);
1926 dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview);
1927 dzn_image_view_finish(&iview);
1928
1929 D3D12_GPU_DESCRIPTOR_HANDLE handle =
1930 dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
1931 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
1932 }
1933
1934 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer)1935 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
1936 struct dzn_image *img,
1937 VkImageAspectFlagBits aspect,
1938 uint32_t level, uint32_t layer)
1939 {
1940 bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1941 VkImageSubresourceRange range = {
1942 .aspectMask = (VkImageAspectFlags)aspect,
1943 .baseMipLevel = level,
1944 .levelCount = 1,
1945 .baseArrayLayer = layer,
1946 .layerCount = 1,
1947 };
1948
1949 if (ds) {
1950 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
1951 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
1952 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle);
1953 } else {
1954 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
1955 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
1956 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL);
1957 }
1958 }
1959
1960 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,bool resolve)1961 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
1962 const struct dzn_image *src,
1963 const struct dzn_image *dst,
1964 VkImageAspectFlagBits aspect,
1965 VkFilter filter, bool resolve)
1966 {
1967 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1968 enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
1969 VkImageUsageFlags usage =
1970 vk_format_is_depth_or_stencil(dst->vk.format) ?
1971 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
1972 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1973 struct dzn_meta_blit_key ctx_key = {
1974 .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect),
1975 .samples = (uint32_t)src->vk.samples,
1976 .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
1977 FRAG_RESULT_DEPTH :
1978 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
1979 FRAG_RESULT_STENCIL :
1980 FRAG_RESULT_DATA0),
1981 .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
1982 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
1983 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
1984 GLSL_TYPE_FLOAT),
1985 .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
1986 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
1987 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
1988 GLSL_SAMPLER_DIM_3D),
1989 .src_is_array = src->vk.array_layers > 1,
1990 .resolve = resolve,
1991 .linear_filter = filter == VK_FILTER_LINEAR,
1992 .padding = 0,
1993 };
1994
1995 const struct dzn_meta_blit *ctx =
1996 dzn_meta_blits_get_context(device, &ctx_key);
1997 assert(ctx);
1998
1999 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2000 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2001 }
2002
2003 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2004 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2005 const struct dzn_image *src,
2006 const VkImageSubresourceLayers *src_subres,
2007 const VkOffset3D *src_offsets,
2008 const struct dzn_image *dst,
2009 const VkImageSubresourceLayers *dst_subres,
2010 const VkOffset3D *dst_offsets,
2011 bool normalize_src_coords)
2012 {
2013 uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2014 uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2015 uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2016 uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2017
2018 float dst_pos[4] = {
2019 (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2020 (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2021 };
2022
2023 float src_pos[4] = {
2024 (float)src_offsets[0].x, (float)src_offsets[0].y,
2025 (float)src_offsets[1].x, (float)src_offsets[1].y,
2026 };
2027
2028 if (normalize_src_coords) {
2029 src_pos[0] /= src_w;
2030 src_pos[1] /= src_h;
2031 src_pos[2] /= src_w;
2032 src_pos[3] /= src_h;
2033 }
2034
2035 float coords[] = {
2036 dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2037 dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2038 dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2039 dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2040 };
2041
2042 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2043
2044 D3D12_VIEWPORT vp = {
2045 .TopLeftX = 0,
2046 .TopLeftY = 0,
2047 .Width = (float)dst_w,
2048 .Height = (float)dst_h,
2049 .MinDepth = 0,
2050 .MaxDepth = 1,
2051 };
2052 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2053
2054 D3D12_RECT scissor = {
2055 .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2056 .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2057 .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2058 .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2059 };
2060 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2061 }
2062
2063 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,bool post)2064 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2065 struct dzn_image *src, VkImageLayout src_layout,
2066 const VkImageSubresourceLayers *src_subres,
2067 struct dzn_image *dst, VkImageLayout dst_layout,
2068 const VkImageSubresourceLayers *dst_subres,
2069 VkImageAspectFlagBits aspect,
2070 bool post)
2071 {
2072 VkImageSubresourceRange src_range = {
2073 .aspectMask = src_subres->aspectMask,
2074 .baseMipLevel = src_subres->mipLevel,
2075 .levelCount = 1,
2076 .baseArrayLayer = src_subres->baseArrayLayer,
2077 .layerCount = src_subres->layerCount,
2078 };
2079 VkImageSubresourceRange dst_range = {
2080 .aspectMask = dst_subres->aspectMask,
2081 .baseMipLevel = dst_subres->mipLevel,
2082 .levelCount = 1,
2083 .baseArrayLayer = dst_subres->baseArrayLayer,
2084 .layerCount = dst_subres->layerCount,
2085 };
2086
2087 if (!post) {
2088 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2089 src_layout,
2090 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2091 DZN_QUEUE_TRANSITION_FLUSH);
2092 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2093 dst_layout,
2094 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2095 DZN_QUEUE_TRANSITION_FLUSH);
2096 } else {
2097 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2098 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2099 src_layout,
2100 DZN_QUEUE_TRANSITION_FLUSH);
2101 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2102 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2103 dst_layout,
2104 DZN_QUEUE_TRANSITION_FLUSH);
2105 }
2106 }
2107
2108 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,uint32_t r)2109 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2110 const VkBlitImageInfo2 *info,
2111 struct dzn_descriptor_heap *heap,
2112 uint32_t *heap_slot,
2113 uint32_t r)
2114 {
2115 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2116 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2117
2118 const VkImageBlit2 *region = &info->pRegions[r];
2119 bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2120 bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2121
2122 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2123 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false);
2124 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2125 src, info->srcImageLayout, ®ion->srcSubresource,
2126 dst, info->dstImageLayout, ®ion->dstSubresource,
2127 aspect, false);
2128 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
2129 aspect, ®ion->srcSubresource,
2130 heap, (*heap_slot)++);
2131 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2132 src, ®ion->srcSubresource, region->srcOffsets,
2133 dst, ®ion->dstSubresource, region->dstOffsets,
2134 src->vk.samples == 1);
2135
2136 uint32_t dst_depth =
2137 region->dstOffsets[1].z > region->dstOffsets[0].z ?
2138 region->dstOffsets[1].z - region->dstOffsets[0].z :
2139 region->dstOffsets[0].z - region->dstOffsets[1].z;
2140 uint32_t src_depth =
2141 region->srcOffsets[1].z > region->srcOffsets[0].z ?
2142 region->srcOffsets[1].z - region->srcOffsets[0].z :
2143 region->srcOffsets[0].z - region->srcOffsets[1].z;
2144
2145 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
2146 uint32_t dst_level = region->dstSubresource.mipLevel;
2147
2148 float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
2149 if (region->srcOffsets[0].z > region->srcOffsets[1].z)
2150 src_slice_step = -src_slice_step;
2151 float src_z_coord =
2152 src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
2153 uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
2154 uint32_t dst_z_coord =
2155 dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
2156 if (region->dstOffsets[0].z > region->dstOffsets[1].z)
2157 dst_z_coord--;
2158
2159 uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
2160 1 : -1;
2161
2162 /* Normalize the src coordinates/step */
2163 if (src_is_3d) {
2164 src_z_coord /= src->vk.extent.depth;
2165 src_slice_step /= src->vk.extent.depth;
2166 }
2167
2168 for (uint32_t slice = 0; slice < slice_count; slice++) {
2169 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
2170 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2171 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2172 src_z_coord += src_slice_step;
2173 dst_z_coord += dst_slice_step;
2174 }
2175
2176 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2177 src, info->srcImageLayout, ®ion->srcSubresource,
2178 dst, info->dstImageLayout, ®ion->dstSubresource,
2179 aspect, true);
2180 }
2181 }
2182
2183 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,uint32_t r)2184 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
2185 const VkResolveImageInfo2 *info,
2186 struct dzn_descriptor_heap *heap,
2187 uint32_t *heap_slot,
2188 uint32_t r)
2189 {
2190 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2191 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2192
2193 const VkImageResolve2 *region = &info->pRegions[r];
2194
2195 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2196 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true);
2197 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2198 src, info->srcImageLayout, ®ion->srcSubresource,
2199 dst, info->dstImageLayout, ®ion->dstSubresource,
2200 aspect, false);
2201 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
2202 ®ion->srcSubresource,
2203 heap, (*heap_slot)++);
2204
2205 VkOffset3D src_offset[2] = {
2206 {
2207 .x = region->srcOffset.x,
2208 .y = region->srcOffset.y,
2209 },
2210 {
2211 .x = (int32_t)(region->srcOffset.x + region->extent.width),
2212 .y = (int32_t)(region->srcOffset.y + region->extent.height),
2213 },
2214 };
2215 VkOffset3D dst_offset[2] = {
2216 {
2217 .x = region->dstOffset.x,
2218 .y = region->dstOffset.y,
2219 },
2220 {
2221 .x = (int32_t)(region->dstOffset.x + region->extent.width),
2222 .y = (int32_t)(region->dstOffset.y + region->extent.height),
2223 },
2224 };
2225
2226 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2227 src, ®ion->srcSubresource, src_offset,
2228 dst, ®ion->dstSubresource, dst_offset,
2229 false);
2230
2231 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
2232 for (uint32_t layer = 0; layer < layer_count; layer++) {
2233 float src_z_coord = layer;
2234
2235 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
2236 dst, aspect, region->dstSubresource.mipLevel,
2237 region->dstSubresource.baseArrayLayer + layer);
2238 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2239 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2240 }
2241
2242 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2243 src, info->srcImageLayout, ®ion->srcSubresource,
2244 dst, info->dstImageLayout, ®ion->dstSubresource,
2245 aspect, true);
2246 }
2247 }
2248
2249 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2250 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2251 {
2252 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2253
2254 if (!pipeline)
2255 return;
2256
2257 ID3D12PipelineState *old_pipeline_state =
2258 cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
2259
2260 if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
2261 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2262 struct dzn_graphics_pipeline *gfx =
2263 (struct dzn_graphics_pipeline *)pipeline;
2264 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2265 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
2266 dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
2267 } else {
2268 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2269 }
2270 }
2271
2272 ID3D12PipelineState *new_pipeline_state = pipeline->state;
2273
2274 if (old_pipeline_state != new_pipeline_state) {
2275 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
2276 cmdbuf->state.pipeline = pipeline;
2277 }
2278 }
2279
2280 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2281 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2282 {
2283 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2284 struct dzn_descriptor_state *desc_state =
2285 &cmdbuf->state.bindpoint[bindpoint].desc_state;
2286 struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
2287 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
2288 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
2289 };
2290 uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
2291 bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
2292 const struct dzn_pipeline *pipeline =
2293 cmdbuf->state.bindpoint[bindpoint].pipeline;
2294
2295 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS))
2296 goto set_heaps;
2297
2298 dzn_foreach_pool_type (type) {
2299 uint32_t desc_count = pipeline->desc_count[type];
2300 if (!desc_count)
2301 continue;
2302
2303 struct dzn_descriptor_heap_pool *pool =
2304 type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
2305 &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
2306 struct dzn_descriptor_heap *dst_heap = NULL;
2307 uint32_t dst_heap_offset = 0;
2308
2309 dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
2310 &dst_heap, &dst_heap_offset);
2311 new_heap_offsets[type] = dst_heap_offset;
2312 update_root_desc_table[type] = true;
2313
2314 for (uint32_t s = 0; s < MAX_SETS; s++) {
2315 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
2316 if (!set) continue;
2317
2318 uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
2319 uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type];
2320 if (set_desc_count) {
2321 mtx_lock(&set->pool->defragment_lock);
2322 dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset,
2323 &set->pool->heaps[type], set->heap_offsets[type],
2324 set_desc_count);
2325 mtx_unlock(&set->pool->defragment_lock);
2326 }
2327
2328 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
2329 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
2330 for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
2331 uint32_t desc_heap_offset =
2332 pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv;
2333 struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
2334 bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
2335
2336 dzn_descriptor_heap_write_buffer_desc(dst_heap,
2337 dst_heap_offset + set_heap_offset + desc_heap_offset,
2338 false, &bdesc);
2339
2340 if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) {
2341 desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav;
2342 dzn_descriptor_heap_write_buffer_desc(dst_heap,
2343 dst_heap_offset + set_heap_offset + desc_heap_offset,
2344 true, &bdesc);
2345 }
2346 }
2347 }
2348 }
2349
2350 new_heaps[type] = dst_heap;
2351 }
2352
2353 set_heaps:
2354 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
2355 new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
2356 ID3D12DescriptorHeap *desc_heaps[2];
2357 uint32_t num_desc_heaps = 0;
2358 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
2359 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
2360 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
2361 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
2362 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
2363
2364 for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
2365 cmdbuf->state.heaps[h] = new_heaps[h];
2366 }
2367
2368 for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
2369 D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
2370
2371 if (!update_root_desc_table[type])
2372 continue;
2373
2374 D3D12_GPU_DESCRIPTOR_HANDLE handle =
2375 dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
2376
2377 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2378 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2379 else
2380 ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2381 }
2382 }
2383
2384 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2385 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2386 {
2387 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
2388 return;
2389
2390 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2391 uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
2392
2393 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2394 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2395 sizeof(cmdbuf->state.sysvals.gfx) / 4,
2396 &cmdbuf->state.sysvals.gfx, 0);
2397 } else {
2398 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2399 sizeof(cmdbuf->state.sysvals.compute) / 4,
2400 &cmdbuf->state.sysvals.compute, 0);
2401 }
2402 }
2403
2404 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)2405 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
2406 {
2407 const struct dzn_graphics_pipeline *pipeline =
2408 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2409
2410 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
2411 !pipeline->vp.count)
2412 return;
2413
2414 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
2415 }
2416
2417 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)2418 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
2419 {
2420 const struct dzn_graphics_pipeline *pipeline =
2421 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2422
2423 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
2424 return;
2425
2426 if (!pipeline->scissor.count) {
2427 /* Apply a scissor delimiting the render area. */
2428 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
2429 return;
2430 }
2431
2432 D3D12_RECT scissors[MAX_SCISSOR];
2433
2434 memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
2435 for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
2436 scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
2437 scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
2438 scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
2439 scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
2440 }
2441
2442 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
2443 }
2444
2445 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)2446 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
2447 {
2448 unsigned start, end;
2449
2450 BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
2451 ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views);
2452
2453 BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
2454 }
2455
2456 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)2457 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
2458 {
2459 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
2460 return;
2461
2462 ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
2463 }
2464
2465 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2466 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2467 {
2468 struct dzn_cmd_buffer_push_constant_state *state =
2469 bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
2470 &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
2471
2472 uint32_t offset = state->offset / 4;
2473 uint32_t end = ALIGN(state->end, 4) / 4;
2474 uint32_t count = end - offset;
2475
2476 if (!count)
2477 return;
2478
2479 uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
2480 uint32_t *vals = state->values + offset;
2481
2482 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2483 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2484 else
2485 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2486
2487 state->offset = 0;
2488 state->end = 0;
2489 }
2490
2491 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)2492 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
2493 {
2494 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
2495 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
2496 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2497 uint32_t ref =
2498 gfx->zsa.stencil_test.front.uses_ref ?
2499 cmdbuf->state.zsa.stencil_test.front.ref :
2500 cmdbuf->state.zsa.stencil_test.back.ref;
2501 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
2502 }
2503 }
2504
2505 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)2506 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
2507 {
2508 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
2509 ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
2510 cmdbuf->state.blend.constants);
2511 }
2512
2513 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)2514 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
2515 {
2516 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
2517 ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
2518 cmdbuf->state.zsa.depth_bounds.min,
2519 cmdbuf->state.zsa.depth_bounds.max);
2520 }
2521 }
2522
2523 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)2524 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
2525 {
2526 uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
2527 uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
2528
2529 *vertex_count = triangle_count * 3;
2530 if (!*vertex_count)
2531 return VK_SUCCESS;
2532
2533 ID3D12Resource *index_buf;
2534 VkResult result =
2535 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
2536 D3D12_HEAP_TYPE_UPLOAD,
2537 D3D12_RESOURCE_STATE_GENERIC_READ,
2538 &index_buf);
2539 if (result != VK_SUCCESS)
2540 return result;
2541
2542 void *cpu_ptr;
2543 ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
2544
2545 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
2546 if (index_size == 2) {
2547 uint16_t *indices = (uint16_t *)cpu_ptr;
2548 for (uint32_t t = 0; t < triangle_count; t++) {
2549 indices[t * 3] = t + 1;
2550 indices[(t * 3) + 1] = t + 2;
2551 indices[(t * 3) + 2] = 0;
2552 }
2553 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
2554 } else {
2555 uint32_t *indices = (uint32_t *)cpu_ptr;
2556 for (uint32_t t = 0; t < triangle_count; t++) {
2557 indices[t * 3] = t + 1;
2558 indices[(t * 3) + 1] = t + 2;
2559 indices[(t * 3) + 2] = 0;
2560 }
2561 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2562 }
2563
2564 cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
2565 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf);
2566 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2567 return VK_SUCCESS;
2568 }
2569
2570 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)2571 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
2572 uint32_t *index_count,
2573 uint32_t *first_index)
2574 {
2575 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2576 uint32_t triangle_count = MAX2(*index_count, 2) - 2;
2577
2578 *index_count = triangle_count * 3;
2579 if (!*index_count)
2580 return VK_SUCCESS;
2581
2582 /* New index is always 32bit to make the compute shader rewriting the
2583 * index simpler */
2584 ID3D12Resource *new_index_buf;
2585 VkResult result =
2586 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
2587 D3D12_HEAP_TYPE_DEFAULT,
2588 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2589 &new_index_buf);
2590 if (result != VK_SUCCESS)
2591 return result;
2592
2593 D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
2594 cmdbuf->state.ib.view.BufferLocation;
2595
2596 ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
2597 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2598 ASSERTED bool prim_restart =
2599 dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
2600
2601 assert(!prim_restart);
2602
2603 enum dzn_index_type index_type =
2604 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
2605 const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2606 &device->triangle_fan[index_type];
2607
2608 struct dzn_triangle_fan_rewrite_index_params params = {
2609 .first_index = *first_index,
2610 };
2611
2612 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2613 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2614 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
2615 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
2616 ¶ms, 0);
2617 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
2618 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
2619
2620 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
2621 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2622 D3D12_RESOURCE_STATE_INDEX_BUFFER,
2623 DZN_QUEUE_TRANSITION_FLUSH);
2624
2625 /* We don't mess up with the driver state when executing our internal
2626 * compute shader, but we still change the D3D12 state, so let's mark
2627 * things dirty if needed.
2628 */
2629 cmdbuf->state.pipeline = NULL;
2630 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2631 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2632 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2633 }
2634
2635 cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
2636 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
2637 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2638 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2639 *first_index = 0;
2640 return VK_SUCCESS;
2641 }
2642
2643 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)2644 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2645 {
2646 if (indexed)
2647 dzn_cmd_buffer_update_ibview(cmdbuf);
2648
2649 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2650 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2651 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2652 dzn_cmd_buffer_update_viewports(cmdbuf);
2653 dzn_cmd_buffer_update_scissors(cmdbuf);
2654 dzn_cmd_buffer_update_vbviews(cmdbuf);
2655 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2656 dzn_cmd_buffer_update_zsa(cmdbuf);
2657 dzn_cmd_buffer_update_blend_constants(cmdbuf);
2658 dzn_cmd_buffer_update_depth_bounds(cmdbuf);
2659
2660 /* Reset the dirty states */
2661 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0;
2662 cmdbuf->state.dirty = 0;
2663 }
2664
2665 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)2666 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2667 {
2668 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2669 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2670
2671 if (!pipeline->ia.triangle_fan)
2672 return 0;
2673
2674 uint32_t max_triangles;
2675
2676 if (indexed) {
2677 uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
2678 uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
2679
2680 max_triangles = MAX2(max_indices, 2) - 2;
2681 } else {
2682 uint32_t max_vertex = 0;
2683 for (uint32_t i = 0; i < pipeline->vb.count; i++) {
2684 max_vertex =
2685 MAX2(max_vertex,
2686 cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
2687 }
2688
2689 max_triangles = MAX2(max_vertex, 2) - 2;
2690 }
2691
2692 return max_triangles * 3;
2693 }
2694
2695 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)2696 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
2697 ID3D12Resource *draw_buf,
2698 size_t draw_buf_offset,
2699 ID3D12Resource *count_buf,
2700 size_t count_buf_offset,
2701 uint32_t max_draw_count,
2702 uint32_t draw_buf_stride,
2703 bool indexed)
2704 {
2705 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2706 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2707 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2708 uint32_t min_draw_buf_stride =
2709 indexed ?
2710 sizeof(struct dzn_indirect_indexed_draw_params) :
2711 sizeof(struct dzn_indirect_draw_params);
2712 bool prim_restart =
2713 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
2714
2715 draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
2716 assert(draw_buf_stride >= min_draw_buf_stride);
2717 assert((draw_buf_stride & 3) == 0);
2718
2719 uint32_t triangle_fan_index_buf_stride =
2720 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
2721 sizeof(uint32_t);
2722 uint32_t exec_buf_stride =
2723 triangle_fan_index_buf_stride > 0 ?
2724 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
2725 sizeof(struct dzn_indirect_draw_exec_params);
2726 uint32_t triangle_fan_exec_buf_stride =
2727 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
2728 uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
2729 uint32_t exec_buf_draw_offset = 0;
2730
2731 // We reserve the first slot for the draw_count value when indirect count is
2732 // involved.
2733 if (count_buf != NULL) {
2734 exec_buf_size += exec_buf_stride;
2735 exec_buf_draw_offset = exec_buf_stride;
2736 }
2737
2738 ID3D12Resource *exec_buf;
2739 VkResult result =
2740 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
2741 D3D12_HEAP_TYPE_DEFAULT,
2742 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2743 &exec_buf);
2744 if (result != VK_SUCCESS)
2745 return;
2746
2747 D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
2748 ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
2749 ID3D12Resource *triangle_fan_index_buf = NULL;
2750 ID3D12Resource *triangle_fan_exec_buf = NULL;
2751
2752 if (triangle_fan_index_buf_stride) {
2753 result =
2754 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2755 max_draw_count * triangle_fan_index_buf_stride,
2756 D3D12_HEAP_TYPE_DEFAULT,
2757 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2758 &triangle_fan_index_buf);
2759 if (result != VK_SUCCESS)
2760 return;
2761
2762 result =
2763 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2764 max_draw_count * triangle_fan_exec_buf_stride,
2765 D3D12_HEAP_TYPE_DEFAULT,
2766 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2767 &triangle_fan_exec_buf);
2768 if (result != VK_SUCCESS)
2769 return;
2770 }
2771
2772 struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
2773 .draw_buf_stride = draw_buf_stride,
2774 .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
2775 .triangle_fan_index_buf_start =
2776 triangle_fan_index_buf ?
2777 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
2778 .exec_buf_start =
2779 prim_restart ?
2780 ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
2781 };
2782 uint32_t params_size;
2783 if (triangle_fan_index_buf_stride > 0 && prim_restart)
2784 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
2785 else if (triangle_fan_index_buf_stride > 0)
2786 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
2787 else
2788 params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
2789
2790 enum dzn_indirect_draw_type draw_type;
2791
2792 if (indexed && triangle_fan_index_buf_stride > 0) {
2793 if (prim_restart && count_buf)
2794 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
2795 else if (prim_restart && !count_buf)
2796 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
2797 else if (!prim_restart && count_buf)
2798 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
2799 else
2800 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
2801 } else if (!indexed && triangle_fan_index_buf_stride > 0) {
2802 draw_type = count_buf ?
2803 DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
2804 DZN_INDIRECT_DRAW_TRIANGLE_FAN;
2805 } else if (indexed) {
2806 draw_type = count_buf ?
2807 DZN_INDIRECT_INDEXED_DRAW_COUNT :
2808 DZN_INDIRECT_INDEXED_DRAW;
2809 } else {
2810 draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW;
2811 }
2812
2813 struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
2814 uint32_t root_param_idx = 0;
2815
2816 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
2817 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
2818 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2819 params_size / 4, (const void *)¶ms, 0);
2820 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
2821 draw_buf_gpu);
2822 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2823 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
2824 if (count_buf) {
2825 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2826 root_param_idx++,
2827 ID3D12Resource_GetGPUVirtualAddress(count_buf) +
2828 count_buf_offset);
2829 }
2830
2831 if (triangle_fan_exec_buf) {
2832 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
2833 root_param_idx++,
2834 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
2835 }
2836
2837 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
2838
2839 D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
2840
2841 if (triangle_fan_exec_buf) {
2842 enum dzn_index_type index_type =
2843 indexed ?
2844 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
2845 DZN_NO_INDEX;
2846 struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2847 &device->triangle_fan[index_type];
2848
2849 struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
2850
2851 assert(rewrite_index->root_sig);
2852 assert(rewrite_index->pipeline_state);
2853 assert(rewrite_index->cmd_sig);
2854
2855 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
2856 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2857 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2858 DZN_QUEUE_TRANSITION_FLUSH);
2859
2860 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2861 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2862 root_param_idx = 0;
2863 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2864 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
2865 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2866 sizeof(rewrite_index_params) / 4,
2867 (const void *)&rewrite_index_params, 0);
2868
2869 if (indexed) {
2870 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2871 root_param_idx++,
2872 cmdbuf->state.ib.view.BufferLocation);
2873 }
2874
2875 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
2876 max_draw_count, triangle_fan_exec_buf, 0,
2877 count_buf ? exec_buf : NULL, 0);
2878
2879 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
2880 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2881 D3D12_RESOURCE_STATE_INDEX_BUFFER,
2882 DZN_QUEUE_TRANSITION_FLUSH);
2883
2884 /* After our triangle-fan lowering the draw is indexed */
2885 indexed = true;
2886 ib_view = cmdbuf->state.ib.view;
2887 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
2888 cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
2889 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2890 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2891 }
2892
2893 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
2894 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2895 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2896 DZN_QUEUE_TRANSITION_FLUSH);
2897
2898 /* We don't mess up with the driver state when executing our internal
2899 * compute shader, but we still change the D3D12 state, so let's mark
2900 * things dirty if needed.
2901 */
2902 cmdbuf->state.pipeline = NULL;
2903 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2904 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2905 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2906 }
2907
2908 cmdbuf->state.sysvals.gfx.first_vertex = 0;
2909 cmdbuf->state.sysvals.gfx.base_instance = 0;
2910 cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
2911 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
2912 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
2913
2914 dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
2915
2916 /* Restore the old IB view if we modified it during the triangle fan lowering */
2917 if (ib_view.SizeInBytes) {
2918 cmdbuf->state.ib.view = ib_view;
2919 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2920 }
2921
2922 enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
2923 triangle_fan_index_buf_stride > 0 ?
2924 DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
2925 indexed ?
2926 DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
2927 DZN_INDIRECT_DRAW_CMD_SIG;
2928 ID3D12CommandSignature *cmdsig =
2929 dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
2930
2931 if (!cmdsig) {
2932 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2933 return;
2934 }
2935
2936 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
2937 max_draw_count,
2938 exec_buf, exec_buf_draw_offset,
2939 count_buf ? exec_buf : NULL, 0);
2940 }
2941
2942 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)2943 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
2944 {
2945 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2946 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2947 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2948 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2949
2950 /* Reset the dirty states */
2951 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0;
2952 }
2953
2954 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)2955 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
2956 const VkCopyBufferInfo2 *info)
2957 {
2958 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2959 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2960 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2961
2962 for (int i = 0; i < info->regionCount; i++) {
2963 const VkBufferCopy2 *region = info->pRegions + i;
2964
2965 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
2966 src_buffer->res, region->srcOffset,
2967 region->size);
2968 }
2969 }
2970
2971 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)2972 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
2973 const VkCopyBufferToImageInfo2 *info)
2974 {
2975 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2976
2977 for (int i = 0; i < info->regionCount; i++) {
2978 const VkBufferImageCopy2 *region = info->pRegions + i;
2979
2980 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2981 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2982 dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
2983 }
2984 }
2985 }
2986
2987 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)2988 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
2989 const VkCopyImageToBufferInfo2 *info)
2990 {
2991 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2992
2993 for (int i = 0; i < info->regionCount; i++) {
2994 const VkBufferImageCopy2 *region = info->pRegions + i;
2995
2996 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2997 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2998 dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
2999 }
3000 }
3001 }
3002
3003 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)3004 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
3005 const VkCopyImageInfo2 *info)
3006 {
3007 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3008 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3009 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3010 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3011
3012 assert(src->vk.samples == dst->vk.samples);
3013
3014 bool requires_temp_res = false;
3015
3016 for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
3017 const VkImageCopy2 *region = &info->pRegions[i];
3018
3019 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3020 assert(aspect & region->dstSubresource.aspectMask);
3021
3022 if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
3023 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
3024 src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
3025 dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
3026 requires_temp_res = true;
3027 break;
3028 }
3029 }
3030 }
3031
3032 bool use_blit = false;
3033 if (src->vk.samples > 1) {
3034 use_blit = requires_temp_res;
3035
3036 for (int i = 0; i < info->regionCount; i++) {
3037 const VkImageCopy2 *region = info->pRegions + i;
3038 if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
3039 region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
3040 region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
3041 region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
3042 region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
3043 region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
3044 use_blit = true;
3045 }
3046 }
3047
3048 if (use_blit) {
3049 /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
3050 * is issued on a transfer queue, but we don't have any better option
3051 * right now...
3052 */
3053 STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
3054
3055 VkBlitImageInfo2 blit_info = {
3056 .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
3057 .srcImage = info->srcImage,
3058 .srcImageLayout = info->srcImageLayout,
3059 .dstImage = info->dstImage,
3060 .dstImageLayout = info->dstImageLayout,
3061 .regionCount = info->regionCount,
3062 .pRegions = blit_regions,
3063 .filter = VK_FILTER_NEAREST,
3064 };
3065
3066 for (uint32_t r = 0; r < info->regionCount; r++) {
3067 blit_regions[r] = (VkImageBlit2) {
3068 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3069 .srcSubresource = info->pRegions[r].srcSubresource,
3070 .srcOffsets = {
3071 info->pRegions[r].srcOffset,
3072 info->pRegions[r].srcOffset,
3073 },
3074 .dstSubresource = info->pRegions[r].dstSubresource,
3075 .dstOffsets = {
3076 info->pRegions[r].dstOffset,
3077 info->pRegions[r].dstOffset,
3078 },
3079 };
3080
3081 blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
3082 blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
3083 blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
3084 blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
3085 blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
3086 blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
3087 }
3088
3089 dzn_CmdBlitImage2(commandBuffer, &blit_info);
3090
3091 STACK_ARRAY_FINISH(blit_regions);
3092 return;
3093 }
3094
3095 D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
3096 D3D12_RESOURCE_DESC tmp_desc = {
3097 .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
3098 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
3099 .DepthOrArraySize = 1,
3100 .MipLevels = 1,
3101 .Format = src->desc.Format,
3102 .SampleDesc = { .Count = 1, .Quality = 0 },
3103 .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
3104 .Flags = D3D12_RESOURCE_FLAG_NONE,
3105 };
3106
3107 if (requires_temp_res) {
3108 ID3D12Device2 *dev = device->dev;
3109 VkImageAspectFlags aspect = 0;
3110 uint64_t max_size = 0;
3111
3112 if (vk_format_has_depth(src->vk.format))
3113 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3114 else if (vk_format_has_stencil(src->vk.format))
3115 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3116 else
3117 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
3118
3119 for (uint32_t i = 0; i < info->regionCount; i++) {
3120 const VkImageCopy2 *region = &info->pRegions[i];
3121 uint64_t region_size = 0;
3122
3123 tmp_desc.Format =
3124 dzn_image_get_dxgi_format(src->vk.format,
3125 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
3126 aspect);
3127 tmp_desc.Width = region->extent.width;
3128 tmp_desc.Height = region->extent.height;
3129
3130 ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
3131 0, 1, 0,
3132 NULL, NULL, NULL,
3133 ®ion_size);
3134 max_size = MAX2(max_size, region_size * region->extent.depth);
3135 }
3136
3137 VkResult result =
3138 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
3139 D3D12_HEAP_TYPE_DEFAULT,
3140 D3D12_RESOURCE_STATE_COPY_DEST,
3141 &tmp_loc.pResource);
3142 if (result != VK_SUCCESS)
3143 return;
3144
3145 tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
3146 }
3147
3148 for (int i = 0; i < info->regionCount; i++) {
3149 const VkImageCopy2 *region = &info->pRegions[i];
3150
3151 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3152 for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++)
3153 dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
3154 }
3155 }
3156 }
3157
3158 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)3159 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
3160 const VkBlitImageInfo2 *info)
3161 {
3162 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3163 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3164
3165 if (info->regionCount == 0)
3166 return;
3167
3168 uint32_t desc_count = 0;
3169 for (uint32_t r = 0; r < info->regionCount; r++)
3170 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3171
3172 struct dzn_descriptor_heap *heap;
3173 uint32_t heap_slot;
3174 VkResult result =
3175 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3176 desc_count, &heap, &heap_slot);
3177
3178 if (result != VK_SUCCESS) {
3179 cmdbuf->error = result;
3180 return;
3181 }
3182
3183 if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3184 ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3185 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3186 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3187 }
3188
3189 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3190
3191 for (uint32_t r = 0; r < info->regionCount; r++)
3192 dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r);
3193
3194 cmdbuf->state.pipeline = NULL;
3195 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3196 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3197 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3198 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3199 }
3200 }
3201
3202 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)3203 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
3204 const VkResolveImageInfo2 *info)
3205 {
3206 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3207 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3208
3209 if (info->regionCount == 0)
3210 return;
3211
3212 uint32_t desc_count = 0;
3213 for (uint32_t r = 0; r < info->regionCount; r++)
3214 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3215
3216 struct dzn_descriptor_heap *heap;
3217 uint32_t heap_slot;
3218 VkResult result =
3219 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3220 desc_count, &heap, &heap_slot);
3221 if (result != VK_SUCCESS) {
3222 cmdbuf->error = result;
3223 return;
3224 }
3225
3226 if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3227 ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3228 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3229 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3230 }
3231
3232 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3233
3234 uint32_t heap_offset = 0;
3235 for (uint32_t r = 0; r < info->regionCount; r++)
3236 dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r);
3237
3238 cmdbuf->state.pipeline = NULL;
3239 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3240 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3241 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3242 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3243 }
3244 }
3245
3246 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3247 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
3248 VkImage image,
3249 VkImageLayout imageLayout,
3250 const VkClearColorValue *pColor,
3251 uint32_t rangeCount,
3252 const VkImageSubresourceRange *pRanges)
3253 {
3254 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3255 VK_FROM_HANDLE(dzn_image, img, image);
3256
3257 dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
3258 }
3259
3260 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3261 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
3262 VkImage image,
3263 VkImageLayout imageLayout,
3264 const VkClearDepthStencilValue *pDepthStencil,
3265 uint32_t rangeCount,
3266 const VkImageSubresourceRange *pRanges)
3267 {
3268 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3269 VK_FROM_HANDLE(dzn_image, img, image);
3270
3271 dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
3272 }
3273
3274 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)3275 dzn_CmdDispatch(VkCommandBuffer commandBuffer,
3276 uint32_t groupCountX,
3277 uint32_t groupCountY,
3278 uint32_t groupCountZ)
3279 {
3280 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3281
3282 cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
3283 cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
3284 cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
3285 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3286 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3287
3288 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
3289 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
3290 }
3291
3292 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)3293 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
3294 VkBuffer dstBuffer,
3295 VkDeviceSize dstOffset,
3296 VkDeviceSize size,
3297 uint32_t data)
3298 {
3299 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3300 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3301
3302 if (size == VK_WHOLE_SIZE)
3303 size = buf->size - dstOffset;
3304
3305 size &= ~3ULL;
3306
3307 ID3D12Resource *src_res;
3308 VkResult result =
3309 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3310 D3D12_HEAP_TYPE_UPLOAD,
3311 D3D12_RESOURCE_STATE_GENERIC_READ,
3312 &src_res);
3313 if (result != VK_SUCCESS)
3314 return;
3315
3316 uint32_t *cpu_ptr;
3317 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
3318 for (uint32_t i = 0; i < size / 4; i++)
3319 cpu_ptr[i] = data;
3320
3321 ID3D12Resource_Unmap(src_res, 0, NULL);
3322
3323 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3324 }
3325
3326 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)3327 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
3328 VkBuffer dstBuffer,
3329 VkDeviceSize dstOffset,
3330 VkDeviceSize size,
3331 const void *data)
3332 {
3333 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3334 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3335
3336 if (size == VK_WHOLE_SIZE)
3337 size = buf->size - dstOffset;
3338
3339 /*
3340 * The spec says:
3341 * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
3342 * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
3343 * is not a multiple of 4, then the nearest smaller multiple is used."
3344 */
3345 size &= ~3ULL;
3346
3347 ID3D12Resource *src_res;
3348 VkResult result =
3349 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3350 D3D12_HEAP_TYPE_UPLOAD,
3351 D3D12_RESOURCE_STATE_GENERIC_READ,
3352 &src_res);
3353 if (result != VK_SUCCESS)
3354 return;
3355
3356 void *cpu_ptr;
3357 ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
3358 memcpy(cpu_ptr, data, size),
3359 ID3D12Resource_Unmap(src_res, 0, NULL);
3360
3361 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3362 }
3363
3364 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)3365 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
3366 uint32_t attachmentCount,
3367 const VkClearAttachment *pAttachments,
3368 uint32_t rectCount,
3369 const VkClearRect *pRects)
3370 {
3371 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3372
3373 for (unsigned i = 0; i < attachmentCount; i++) {
3374 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3375 struct dzn_image_view *view = NULL;
3376
3377 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
3378 assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
3379 view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
3380 layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
3381 } else {
3382 if (cmdbuf->state.render.attachments.depth.iview &&
3383 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
3384 view = cmdbuf->state.render.attachments.depth.iview;
3385 layout = cmdbuf->state.render.attachments.depth.layout;
3386 }
3387
3388 if (cmdbuf->state.render.attachments.stencil.iview &&
3389 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
3390 assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
3391 view = cmdbuf->state.render.attachments.stencil.iview;
3392 layout = cmdbuf->state.render.attachments.stencil.layout;
3393 }
3394 }
3395
3396 if (!view)
3397 continue;
3398
3399 for (uint32_t j = 0; j < rectCount; j++) {
3400 D3D12_RECT rect;
3401
3402 dzn_translate_rect(&rect, &pRects[j].rect);
3403 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
3404 &pAttachments[i].clearValue,
3405 pAttachments[i].aspectMask,
3406 pRects[j].baseArrayLayer,
3407 pRects[j].layerCount,
3408 1, &rect);
3409 }
3410 }
3411 }
3412
3413 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect)3414 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
3415 const struct dzn_rendering_attachment *att,
3416 VkImageAspectFlagBits aspect)
3417 {
3418 struct dzn_image_view *src = att->iview;
3419 struct dzn_image_view *dst = att->resolve.iview;
3420
3421 if (!src || !dst)
3422 return;
3423
3424 VkImageLayout src_layout = att->layout;
3425 VkImageLayout dst_layout = att->resolve.layout;
3426 struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
3427 D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect);
3428 struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
3429 D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect);
3430
3431 VkImageSubresourceRange src_range = {
3432 .aspectMask = (VkImageAspectFlags)aspect,
3433 .baseMipLevel = src->vk.base_mip_level,
3434 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3435 .baseArrayLayer = src->vk.base_array_layer,
3436 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3437 };
3438
3439 VkImageSubresourceRange dst_range = {
3440 .aspectMask = (VkImageAspectFlags)aspect,
3441 .baseMipLevel = dst->vk.base_mip_level,
3442 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3443 .baseArrayLayer = dst->vk.base_array_layer,
3444 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3445 };
3446
3447 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3448 src_state,
3449 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3450 DZN_QUEUE_TRANSITION_FLUSH);
3451 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3452 dst_state,
3453 D3D12_RESOURCE_STATE_RESOLVE_DEST,
3454 DZN_QUEUE_TRANSITION_FLUSH);
3455
3456 for (uint32_t level = 0; level < src_range.levelCount; level++) {
3457 for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
3458 uint32_t src_subres =
3459 dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
3460 uint32_t dst_subres =
3461 dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
3462
3463 ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
3464 dst_img->res, dst_subres,
3465 src_img->res, src_subres,
3466 dst->srv_desc.Format);
3467 }
3468 }
3469
3470 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3471 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3472 src_state,
3473 DZN_QUEUE_TRANSITION_FLUSH);
3474 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3475 D3D12_RESOURCE_STATE_RESOLVE_DEST,
3476 dst_state,
3477 DZN_QUEUE_TRANSITION_FLUSH);
3478 }
3479
3480 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)3481 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
3482 const VkRenderingAttachmentInfo *att,
3483 VkImageAspectFlagBits aspect)
3484 {
3485 const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
3486 vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
3487 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3488
3489 if (!initial_layout || !iview)
3490 return;
3491
3492 struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
3493 const VkImageSubresourceRange range = {
3494 .aspectMask = aspect,
3495 .baseMipLevel = iview->vk.base_mip_level,
3496 .levelCount = iview->vk.level_count,
3497 .baseArrayLayer = iview->vk.base_array_layer,
3498 .layerCount = iview->vk.layer_count,
3499 };
3500
3501 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
3502 initial_layout->initialLayout,
3503 att->imageLayout,
3504 DZN_QUEUE_TRANSITION_FLUSH);
3505 }
3506
3507 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)3508 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
3509 const VkRenderingInfo *pRenderingInfo)
3510 {
3511 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3512
3513 D3D12_RECT new_render_area = {
3514 .left = pRenderingInfo->renderArea.offset.x,
3515 .top = pRenderingInfo->renderArea.offset.y,
3516 .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
3517 .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
3518 };
3519
3520 // The render area has an impact on the scissor state.
3521 if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
3522 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3523 cmdbuf->state.render.area = new_render_area;
3524 }
3525
3526 cmdbuf->state.render.flags = pRenderingInfo->flags;
3527 cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
3528 cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
3529
3530 D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
3531 D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
3532
3533 cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
3534 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
3535 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
3536 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3537
3538 cmdbuf->state.render.attachments.colors[i].iview = iview;
3539 cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
3540 cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
3541 cmdbuf->state.render.attachments.colors[i].resolve.iview =
3542 dzn_image_view_from_handle(att->resolveImageView);
3543 cmdbuf->state.render.attachments.colors[i].resolve.layout =
3544 att->resolveImageLayout;
3545 cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
3546
3547 if (!iview) {
3548 rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
3549 continue;
3550 }
3551
3552 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3553 rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
3554 dzn_rendering_attachment_initial_transition(cmdbuf, att,
3555 VK_IMAGE_ASPECT_COLOR_BIT);
3556 }
3557
3558 if (pRenderingInfo->pDepthAttachment) {
3559 const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
3560
3561 cmdbuf->state.render.attachments.depth.iview =
3562 dzn_image_view_from_handle(att->imageView);
3563 cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
3564 cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
3565 cmdbuf->state.render.attachments.depth.resolve.iview =
3566 dzn_image_view_from_handle(att->resolveImageView);
3567 cmdbuf->state.render.attachments.depth.resolve.layout =
3568 att->resolveImageLayout;
3569 cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
3570 dzn_rendering_attachment_initial_transition(cmdbuf, att,
3571 VK_IMAGE_ASPECT_DEPTH_BIT);
3572 }
3573
3574 if (pRenderingInfo->pStencilAttachment) {
3575 const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
3576
3577 cmdbuf->state.render.attachments.stencil.iview =
3578 dzn_image_view_from_handle(att->imageView);
3579 cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
3580 cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
3581 cmdbuf->state.render.attachments.stencil.resolve.iview =
3582 dzn_image_view_from_handle(att->resolveImageView);
3583 cmdbuf->state.render.attachments.stencil.resolve.layout =
3584 att->resolveImageLayout;
3585 cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
3586 dzn_rendering_attachment_initial_transition(cmdbuf, att,
3587 VK_IMAGE_ASPECT_STENCIL_BIT);
3588 }
3589
3590 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3591 struct dzn_image_view *z_iview =
3592 pRenderingInfo->pDepthAttachment ?
3593 dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
3594 NULL;
3595 struct dzn_image_view *s_iview =
3596 pRenderingInfo->pStencilAttachment ?
3597 dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
3598 NULL;
3599 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3600 assert(!z_iview || !s_iview || z_iview == s_iview);
3601
3602 if (iview) {
3603 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3604
3605 zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
3606 }
3607 }
3608
3609 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
3610 pRenderingInfo->colorAttachmentCount,
3611 pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
3612 FALSE, zs_handle.ptr ? &zs_handle : NULL);
3613
3614 for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
3615 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
3616 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3617
3618 if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3619 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
3620 &att->clearValue,
3621 VK_IMAGE_ASPECT_COLOR_BIT, 0,
3622 VK_REMAINING_ARRAY_LAYERS, 1,
3623 &cmdbuf->state.render.area);
3624 }
3625 }
3626
3627 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3628 const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
3629 const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
3630 struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
3631 struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
3632 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3633 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3634
3635 assert(!z_iview || !s_iview || z_iview == s_iview);
3636
3637 VkImageAspectFlags aspects = 0;
3638 VkClearValue clear_val;
3639
3640 if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3641 aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
3642 clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
3643 layout = z_att->imageLayout;
3644 }
3645
3646 if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3647 aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
3648 clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
3649 layout = s_att->imageLayout;
3650 }
3651
3652 if (aspects != 0) {
3653 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
3654 &clear_val, aspects, 0,
3655 VK_REMAINING_ARRAY_LAYERS, 1,
3656 &cmdbuf->state.render.area);
3657 }
3658 }
3659 }
3660
3661 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)3662 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
3663 {
3664 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3665
3666 for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
3667 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3668 &cmdbuf->state.render.attachments.colors[i],
3669 VK_IMAGE_ASPECT_COLOR_BIT);
3670 }
3671
3672 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3673 &cmdbuf->state.render.attachments.depth,
3674 VK_IMAGE_ASPECT_DEPTH_BIT);
3675 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3676 &cmdbuf->state.render.attachments.stencil,
3677 VK_IMAGE_ASPECT_STENCIL_BIT);
3678
3679 memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
3680 }
3681
3682 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)3683 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
3684 VkPipelineBindPoint pipelineBindPoint,
3685 VkPipeline pipe)
3686 {
3687 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3688 VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
3689
3690 cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
3691 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3692 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3693 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
3694
3695 if (!gfx->vp.dynamic) {
3696 memcpy(cmdbuf->state.viewports, gfx->vp.desc,
3697 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
3698 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3699 }
3700
3701 if (!gfx->scissor.dynamic) {
3702 memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
3703 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
3704 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3705 }
3706
3707 if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
3708 cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
3709 cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
3710 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3711 }
3712
3713 if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
3714 cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
3715 cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
3716 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
3717 }
3718
3719 if (!gfx->blend.dynamic_constants) {
3720 memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
3721 sizeof(cmdbuf->state.blend.constants));
3722 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
3723 }
3724
3725 for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
3726 cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
3727
3728 if (gfx->vb.count > 0)
3729 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
3730 }
3731 }
3732
3733 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)3734 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
3735 VkPipelineBindPoint pipelineBindPoint,
3736 VkPipelineLayout layout,
3737 uint32_t firstSet,
3738 uint32_t descriptorSetCount,
3739 const VkDescriptorSet *pDescriptorSets,
3740 uint32_t dynamicOffsetCount,
3741 const uint32_t *pDynamicOffsets)
3742 {
3743 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3744 VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
3745
3746 struct dzn_descriptor_state *desc_state =
3747 &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
3748 uint32_t dirty = 0;
3749
3750 for (uint32_t i = 0; i < descriptorSetCount; i++) {
3751 uint32_t idx = firstSet + i;
3752 VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
3753
3754 if (desc_state->sets[idx].set != set) {
3755 desc_state->sets[idx].set = set;
3756 dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3757 }
3758
3759 uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
3760 if (dynamic_buffer_count) {
3761 assert(dynamicOffsetCount >= dynamic_buffer_count);
3762
3763 for (uint32_t j = 0; j < dynamic_buffer_count; j++)
3764 desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
3765
3766 dynamicOffsetCount -= dynamic_buffer_count;
3767 pDynamicOffsets += dynamic_buffer_count;
3768 dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3769 }
3770 }
3771
3772 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
3773 }
3774
3775 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)3776 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
3777 uint32_t firstViewport,
3778 uint32_t viewportCount,
3779 const VkViewport *pViewports)
3780 {
3781 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3782
3783 STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
3784
3785 for (uint32_t i = 0; i < viewportCount; i++) {
3786 uint32_t vp = i + firstViewport;
3787
3788 dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
3789
3790 if (pViewports[i].minDepth > pViewports[i].maxDepth)
3791 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3792 else
3793 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3794
3795 if (pViewports[i].height > 0)
3796 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
3797 else
3798 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
3799 }
3800
3801 if (viewportCount) {
3802 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3803 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3804 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3805 }
3806 }
3807
3808 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)3809 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
3810 uint32_t firstScissor,
3811 uint32_t scissorCount,
3812 const VkRect2D *pScissors)
3813 {
3814 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3815
3816 for (uint32_t i = 0; i < scissorCount; i++)
3817 dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
3818
3819 if (scissorCount)
3820 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3821 }
3822
3823 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)3824 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
3825 VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
3826 const void *pValues)
3827 {
3828 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3829 struct dzn_cmd_buffer_push_constant_state *states[2];
3830 uint32_t num_states = 0;
3831
3832 if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
3833 states[num_states++] = &cmdbuf->state.push_constant.gfx;
3834
3835 if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
3836 states[num_states++] = &cmdbuf->state.push_constant.compute;
3837
3838 for (uint32_t i = 0; i < num_states; i++) {
3839 memcpy(((char *)states[i]->values) + offset, pValues, size);
3840 states[i]->offset =
3841 states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
3842 states[i]->end = MAX2(states[i]->end, offset + size);
3843 }
3844 }
3845
3846 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)3847 dzn_CmdDraw(VkCommandBuffer commandBuffer,
3848 uint32_t vertexCount,
3849 uint32_t instanceCount,
3850 uint32_t firstVertex,
3851 uint32_t firstInstance)
3852 {
3853 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3854
3855 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3856 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3857
3858 cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
3859 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3860 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3861 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3862
3863 if (pipeline->ia.triangle_fan) {
3864 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3865
3866 VkResult result =
3867 dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
3868 if (result != VK_SUCCESS || !vertexCount)
3869 return;
3870
3871 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3872 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3873 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
3874 firstVertex, firstInstance);
3875
3876 /* Restore the IB view if we modified it when lowering triangle fans. */
3877 if (ib_view.SizeInBytes > 0) {
3878 cmdbuf->state.ib.view = ib_view;
3879 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3880 }
3881 } else {
3882 cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
3883 dzn_cmd_buffer_prepare_draw(cmdbuf, false);
3884 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
3885 firstVertex, firstInstance);
3886 }
3887 }
3888
3889 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)3890 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3891 uint32_t indexCount,
3892 uint32_t instanceCount,
3893 uint32_t firstIndex,
3894 int32_t vertexOffset,
3895 uint32_t firstInstance)
3896 {
3897 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3898
3899 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3900 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3901
3902 if (pipeline->ia.triangle_fan &&
3903 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
3904 /* The indexed+primitive-restart+triangle-fan combination is a mess,
3905 * since we have to walk the index buffer, skip entries with the
3906 * special 0xffff/0xffffffff values, and push triangle list indices
3907 * for the remaining values. All of this has an impact on the index
3908 * count passed to the draw call, which forces us to use the indirect
3909 * path.
3910 */
3911 struct dzn_indirect_indexed_draw_params params = {
3912 .index_count = indexCount,
3913 .instance_count = instanceCount,
3914 .first_index = firstIndex,
3915 .vertex_offset = vertexOffset,
3916 .first_instance = firstInstance,
3917 };
3918
3919 ID3D12Resource *draw_buf;
3920 VkResult result =
3921 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
3922 D3D12_HEAP_TYPE_UPLOAD,
3923 D3D12_RESOURCE_STATE_GENERIC_READ,
3924 &draw_buf);
3925 if (result != VK_SUCCESS)
3926 return;
3927
3928 void *cpu_ptr;
3929 ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
3930 memcpy(cpu_ptr, ¶ms, sizeof(params));
3931
3932 ID3D12Resource_Unmap(draw_buf, 0, NULL);
3933
3934 dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, 0, NULL, 0, 1, sizeof(params), true);
3935 return;
3936 }
3937
3938 cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
3939 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3940 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3941 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3942 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3943
3944 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3945
3946 if (pipeline->ia.triangle_fan) {
3947 VkResult result =
3948 dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
3949 if (result != VK_SUCCESS || !indexCount)
3950 return;
3951 }
3952
3953 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3954 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
3955 vertexOffset, firstInstance);
3956
3957 /* Restore the IB view if we modified it when lowering triangle fans. */
3958 if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
3959 cmdbuf->state.ib.view = ib_view;
3960 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3961 }
3962 }
3963
3964 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)3965 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3966 VkBuffer buffer,
3967 VkDeviceSize offset,
3968 uint32_t drawCount,
3969 uint32_t stride)
3970 {
3971 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3972 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3973
3974 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
3975 }
3976
3977 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)3978 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3979 VkBuffer buffer,
3980 VkDeviceSize offset,
3981 uint32_t drawCount,
3982 uint32_t stride)
3983 {
3984 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3985 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3986
3987 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
3988 }
3989
3990 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)3991 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
3992 VkBuffer buffer,
3993 VkDeviceSize offset,
3994 VkBuffer countBuffer,
3995 VkDeviceSize countBufferOffset,
3996 uint32_t maxDrawCount,
3997 uint32_t stride)
3998 {
3999 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4000 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4001 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4002
4003 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4004 count_buf->res, countBufferOffset,
4005 maxDrawCount, stride, false);
4006 }
4007
4008 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)4009 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
4010 VkBuffer buffer,
4011 VkDeviceSize offset,
4012 VkBuffer countBuffer,
4013 VkDeviceSize countBufferOffset,
4014 uint32_t maxDrawCount,
4015 uint32_t stride)
4016 {
4017 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4018 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4019 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4020
4021 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4022 count_buf->res, countBufferOffset,
4023 maxDrawCount, stride, true);
4024 }
4025
4026 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)4027 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
4028 uint32_t firstBinding,
4029 uint32_t bindingCount,
4030 const VkBuffer *pBuffers,
4031 const VkDeviceSize *pOffsets)
4032 {
4033 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4034
4035 if (!bindingCount)
4036 return;
4037
4038 D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
4039
4040 for (uint32_t i = 0; i < bindingCount; i++) {
4041 VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
4042
4043 vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i];
4044 vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
4045 }
4046
4047 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
4048 firstBinding + bindingCount - 1);
4049 }
4050
4051 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)4052 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
4053 VkBuffer buffer,
4054 VkDeviceSize offset,
4055 VkIndexType indexType)
4056 {
4057 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4058 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4059
4060 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset;
4061 cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
4062 switch (indexType) {
4063 case VK_INDEX_TYPE_UINT16:
4064 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
4065 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
4066 break;
4067 case VK_INDEX_TYPE_UINT32:
4068 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
4069 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
4070 break;
4071 default: unreachable("Invalid index type");
4072 }
4073
4074 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4075
4076 const struct dzn_graphics_pipeline *pipeline =
4077 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
4078
4079 if (pipeline && dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
4080 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4081 }
4082
4083 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)4084 dzn_CmdResetEvent(VkCommandBuffer commandBuffer,
4085 VkEvent event,
4086 VkPipelineStageFlags stageMask)
4087 {
4088 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4089 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4090 VK_FROM_HANDLE(dzn_event, evt, event);
4091
4092 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
4093 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4094 }
4095
4096 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)4097 dzn_CmdSetEvent(VkCommandBuffer commandBuffer,
4098 VkEvent event,
4099 VkPipelineStageFlags stageMask)
4100 {
4101 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4102 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4103 VK_FROM_HANDLE(dzn_event, evt, event);
4104
4105 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
4106 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4107 }
4108
4109 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)4110 dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,
4111 uint32_t eventCount,
4112 const VkEvent *pEvents,
4113 VkPipelineStageFlags srcStageMask,
4114 VkPipelineStageFlags dstStageMask,
4115 uint32_t memoryBarrierCount,
4116 const VkMemoryBarrier *pMemoryBarriers,
4117 uint32_t bufferMemoryBarrierCount,
4118 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
4119 uint32_t imageMemoryBarrierCount,
4120 const VkImageMemoryBarrier *pImageMemoryBarriers)
4121 {
4122 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4123 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4124
4125 /* Intra-command list wait is handle by this pipeline flush, which is
4126 * overkill, but that's the best we can do with the standard D3D12 barrier
4127 * API.
4128 *
4129 * Inter-command list is taken care of by the serialization done at the
4130 * ExecuteCommandList() level:
4131 * "Calling ExecuteCommandLists twice in succession (from the same thread,
4132 * or different threads) guarantees that the first workload (A) finishes
4133 * before the second workload (B)"
4134 *
4135 * HOST -> DEVICE signaling is ignored and we assume events are always
4136 * signaled when we reach the vkCmdWaitEvents() point.:
4137 * "Command buffers in the submission can include vkCmdWaitEvents commands
4138 * that wait on events that will not be signaled by earlier commands in the
4139 * queue. Such events must be signaled by the application using vkSetEvent,
4140 * and the vkCmdWaitEvents commands that wait upon them must not be inside
4141 * a render pass instance.
4142 * The event must be set before the vkCmdWaitEvents command is executed."
4143 */
4144 bool flush_pipeline = false;
4145
4146 for (uint32_t i = 0; i < eventCount; i++) {
4147 VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
4148
4149 struct hash_entry *he =
4150 _mesa_hash_table_search(cmdbuf->events.ht, event);
4151 if (he) {
4152 enum dzn_event_state state = (uintptr_t)he->data;
4153 assert(state != DZN_EVENT_STATE_RESET);
4154 flush_pipeline = state == DZN_EVENT_STATE_SET;
4155 } else {
4156 if (!_mesa_hash_table_insert(cmdbuf->events.ht, event,
4157 (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) {
4158 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4159 return;
4160 }
4161
4162 struct dzn_event **entry =
4163 util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1);
4164
4165 if (!entry) {
4166 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4167 return;
4168 }
4169
4170 *entry = event;
4171 }
4172 }
4173
4174 if (flush_pipeline) {
4175 D3D12_RESOURCE_BARRIER barrier = {
4176 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4177 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4178 .UAV = { .pResource = NULL },
4179 };
4180
4181 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4182 }
4183 }
4184
4185 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)4186 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
4187 VkQueryPool queryPool,
4188 uint32_t query,
4189 VkQueryControlFlags flags)
4190 {
4191 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4192 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4193
4194 struct dzn_cmd_buffer_query_pool_state *state =
4195 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4196 if (!state)
4197 return;
4198
4199 qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags);
4200 dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query);
4201 ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4202 }
4203
4204 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)4205 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
4206 VkQueryPool queryPool,
4207 uint32_t query)
4208 {
4209 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4210 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4211
4212 struct dzn_cmd_buffer_query_pool_state *state =
4213 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4214 if (!state)
4215 return;
4216
4217 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4218 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4219 }
4220
4221 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)4222 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
4223 VkPipelineStageFlags2 stage,
4224 VkQueryPool queryPool,
4225 uint32_t query)
4226 {
4227 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4228 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4229
4230 struct dzn_cmd_buffer_query_pool_state *state =
4231 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4232 if (!state)
4233 return;
4234
4235 /* Execution barrier so the timestamp gets written after the pipeline flush. */
4236 D3D12_RESOURCE_BARRIER barrier = {
4237 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4238 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4239 .UAV = { .pResource = NULL },
4240 };
4241
4242 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4243
4244 qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP;
4245 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4246 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4247 }
4248
4249
4250 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)4251 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
4252 VkQueryPool queryPool,
4253 uint32_t firstQuery,
4254 uint32_t queryCount)
4255 {
4256 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4257 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4258 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4259
4260 struct dzn_cmd_buffer_query_pool_state *state =
4261 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4262
4263 if (!state)
4264 return;
4265
4266 uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
4267
4268 for (uint32_t q = 0; q < queryCount; q += q_step) {
4269 uint32_t q_count = MIN2(queryCount - q, q_step);
4270
4271 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4272 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4273 device->queries.refs,
4274 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4275 q_count * sizeof(uint64_t));
4276 }
4277
4278 q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
4279
4280 for (uint32_t q = 0; q < queryCount; q += q_step) {
4281 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4282 dzn_query_pool_get_result_offset(qpool, firstQuery + q),
4283 device->queries.refs,
4284 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4285 qpool->query_size);
4286 }
4287
4288 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
4289 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
4290 }
4291
4292 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)4293 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
4294 VkQueryPool queryPool,
4295 uint32_t firstQuery,
4296 uint32_t queryCount,
4297 VkBuffer dstBuffer,
4298 VkDeviceSize dstOffset,
4299 VkDeviceSize stride,
4300 VkQueryResultFlags flags)
4301 {
4302 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4303 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4304 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4305
4306 struct dzn_cmd_buffer_query_pool_state *qpstate =
4307 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4308 if (!qpstate)
4309 return;
4310
4311 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
4312 for (uint32_t i = 0; i < queryCount; i++) {
4313 if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) &&
4314 !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i))
4315 dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i);
4316 }
4317 }
4318
4319 VkResult result =
4320 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
4321 if (result != VK_SUCCESS)
4322 return;
4323
4324 bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
4325 stride == qpool->query_size &&
4326 !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
4327 #define ALL_STATS \
4328 (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
4329 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
4330 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
4331 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
4332 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
4333 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
4334 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
4335 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
4336 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
4337 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
4338 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
4339 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
4340 qpool->pipeline_statistics != ALL_STATS)
4341 raw_copy = false;
4342 #undef ALL_STATS
4343
4344 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4345 D3D12_RESOURCE_STATE_COPY_DEST,
4346 D3D12_RESOURCE_STATE_COPY_SOURCE,
4347 DZN_QUEUE_TRANSITION_FLUSH);
4348
4349 if (raw_copy) {
4350 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4351 qpool->collect_buffer,
4352 dzn_query_pool_get_result_offset(qpool, firstQuery),
4353 dzn_query_pool_get_result_size(qpool, queryCount));
4354 } else {
4355 uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
4356
4357 for (uint32_t q = 0; q < queryCount; q++) {
4358 uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
4359 uint32_t dst_counter_offset = 0;
4360
4361 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
4362 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
4363 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
4364 continue;
4365
4366 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4367 qpool->collect_buffer,
4368 res_offset + (c * sizeof(uint64_t)),
4369 step);
4370 dst_counter_offset += step;
4371 }
4372 } else {
4373 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4374 qpool->collect_buffer,
4375 res_offset, step);
4376 dst_counter_offset += step;
4377 }
4378
4379 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
4380 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4381 qpool->collect_buffer,
4382 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4383 step);
4384 }
4385
4386 dstOffset += stride;
4387 }
4388 }
4389
4390 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4391 D3D12_RESOURCE_STATE_COPY_SOURCE,
4392 D3D12_RESOURCE_STATE_COPY_DEST,
4393 0);
4394 }
4395
4396 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)4397 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
4398 VkBuffer buffer,
4399 VkDeviceSize offset)
4400 {
4401 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4402 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4403 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4404
4405 cmdbuf->state.sysvals.compute.group_count_x = 0;
4406 cmdbuf->state.sysvals.compute.group_count_y = 0;
4407 cmdbuf->state.sysvals.compute.group_count_z = 0;
4408 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4409 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4410
4411 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4412
4413 struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
4414 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
4415 ID3D12CommandSignature *cmdsig =
4416 dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
4417
4418 if (!cmdsig) {
4419 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4420 return;
4421 }
4422
4423 ID3D12Resource *exec_buf;
4424 VkResult result =
4425 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
4426 D3D12_HEAP_TYPE_DEFAULT,
4427 D3D12_RESOURCE_STATE_COPY_DEST,
4428 &exec_buf);
4429 if (result != VK_SUCCESS)
4430 return;
4431
4432 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
4433 buf->res,
4434 offset,
4435 sizeof(D3D12_DISPATCH_ARGUMENTS));
4436 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
4437 buf->res,
4438 offset,
4439 sizeof(D3D12_DISPATCH_ARGUMENTS));
4440
4441 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
4442 D3D12_RESOURCE_STATE_COPY_DEST,
4443 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
4444 DZN_QUEUE_TRANSITION_FLUSH);
4445
4446 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
4447 }
4448
4449 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)4450 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
4451 float lineWidth)
4452 {
4453 assert(lineWidth == 1.0f);
4454 }
4455
4456 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)4457 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
4458 float depthBiasConstantFactor,
4459 float depthBiasClamp,
4460 float depthBiasSlopeFactor)
4461 {
4462 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4463
4464 cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
4465 cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
4466 cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
4467 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4468 }
4469
4470 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])4471 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
4472 const float blendConstants[4])
4473 {
4474 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4475
4476 memcpy(cmdbuf->state.blend.constants, blendConstants,
4477 sizeof(cmdbuf->state.blend.constants));
4478 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
4479 }
4480
4481 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)4482 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
4483 float minDepthBounds,
4484 float maxDepthBounds)
4485 {
4486 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4487 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4488 struct dzn_physical_device *pdev =
4489 container_of(device->vk.physical, struct dzn_physical_device, vk);
4490
4491 if (pdev->options2.DepthBoundsTestSupported) {
4492 cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
4493 cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
4494 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
4495 }
4496 }
4497
4498 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)4499 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
4500 VkStencilFaceFlags faceMask,
4501 uint32_t compareMask)
4502 {
4503 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4504
4505 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4506 cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
4507 cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
4508 }
4509
4510 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4511 cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
4512 cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
4513 }
4514
4515 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
4516 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4517 }
4518
4519 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)4520 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
4521 VkStencilFaceFlags faceMask,
4522 uint32_t writeMask)
4523 {
4524 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4525
4526 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4527 cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
4528 cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
4529 }
4530
4531 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4532 cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
4533 cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
4534 }
4535
4536 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
4537 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4538 }
4539
4540 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)4541 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
4542 VkStencilFaceFlags faceMask,
4543 uint32_t reference)
4544 {
4545 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4546
4547 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
4548 cmdbuf->state.zsa.stencil_test.front.ref = reference;
4549
4550 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
4551 cmdbuf->state.zsa.stencil_test.back.ref = reference;
4552
4553 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
4554 }
4555