• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30 
31 
32 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)33 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
34                                         D3D12_RESOURCE_BARRIER *barriers,
35                                         uint32_t barrier_count)
36 {
37    uint32_t flush_count = 0;
38    for (uint32_t b = 0; b < barrier_count; b++) {
39       assert(barriers[b].Transition.pResource);
40 
41       /* some layouts map to the same states, and NOP-barriers are illegal */
42       if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
43          if (flush_count) {
44             ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
45                                                        &barriers[b - flush_count]);
46             flush_count = 0;
47          }
48       } else {
49          flush_count++;
50       }
51    }
52 
53    if (flush_count)
54       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
55                                                  &barriers[barrier_count - flush_count]);
56 
57    /* Set Before = After so we don't execute the same barrier twice. */
58    for (uint32_t b = 0; b < barrier_count; b++)
59       barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
60 }
61 
62 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)63 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
64                                          ID3D12Resource *res,
65                                          uint32_t first_subres,
66                                          uint32_t subres_count)
67 {
68    struct hash_entry *he =
69       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
70    D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
71 
72    if (!barriers)
73       return;
74 
75    dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
76 }
77 
78 enum dzn_queue_transition_flags {
79    DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
80    DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
81 };
82 
83 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)84 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
85                                          ID3D12Resource *res,
86                                          uint32_t first_subres,
87                                          uint32_t subres_count,
88                                          D3D12_RESOURCE_STATES before,
89                                          D3D12_RESOURCE_STATES after,
90                                          uint32_t flags)
91 {
92    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
93    struct hash_entry *he =
94       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
95    struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
96 
97    if (!barriers) {
98       D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
99       D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
100       ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
101       uint32_t barrier_count =
102          fmt_info.PlaneCount *
103          desc.MipLevels * desc.DepthOrArraySize;
104 
105       barriers =
106          vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
107                    8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
108       if (!barriers) {
109          cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
110          return cmdbuf->error;
111       }
112 
113       he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
114       if (!he) {
115          vk_free(&cmdbuf->vk.pool->alloc, barriers);
116          cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117          return cmdbuf->error;
118       }
119    }
120 
121    for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
122       if (!barriers[subres].Transition.pResource) {
123          barriers[subres] = (D3D12_RESOURCE_BARRIER) {
124             .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
125             .Flags = 0,
126             .Transition = {
127                .pResource = res,
128                .Subresource = subres,
129                .StateBefore = before,
130                .StateAfter = after,
131             },
132          };
133       } else {
134 	 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
135             before = barriers[subres].Transition.StateAfter;
136 
137          assert(barriers[subres].Transition.StateAfter == before ||
138                 barriers[subres].Transition.StateAfter == after);
139          barriers[subres].Transition.StateAfter = after;
140       }
141    }
142 
143    if (flags & DZN_QUEUE_TRANSITION_FLUSH)
144       dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
145 
146    return VK_SUCCESS;
147 }
148 
149 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)150 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
151                                                   const struct dzn_image *image,
152                                                   const VkImageSubresourceRange *range,
153                                                   D3D12_RESOURCE_STATES before,
154                                                   D3D12_RESOURCE_STATES after,
155                                                   uint32_t flags)
156 {
157    uint32_t first_barrier = 0, barrier_count = 0;
158    VkResult ret = VK_SUCCESS;
159 
160    dzn_foreach_aspect(aspect, range->aspectMask) {
161       uint32_t layer_count = dzn_get_layer_count(image, range);
162       uint32_t level_count = dzn_get_level_count(image, range);
163       for (uint32_t layer = 0; layer < layer_count; layer++) {
164          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165          if (!barrier_count) {
166             first_barrier = subres;
167             barrier_count = level_count;
168             continue;
169          } else if (first_barrier + barrier_count == subres) {
170             barrier_count += level_count;
171             continue;
172          }
173 
174          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175                                                         first_barrier, barrier_count,
176                                                         before, after, flags);
177          if (ret != VK_SUCCESS)
178             return ret;
179 
180          barrier_count = 0;
181       }
182 
183       if (barrier_count) {
184          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185                                                         first_barrier, barrier_count,
186                                                         before, after, flags);
187          if (ret != VK_SUCCESS)
188             return ret;
189       }
190    }
191 
192    return VK_SUCCESS;
193 }
194 
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197                                                    const struct dzn_image *image,
198                                                    const VkImageSubresourceRange *range,
199                                                    VkImageLayout old_layout,
200                                                    VkImageLayout new_layout,
201                                                    uint32_t flags)
202 {
203    uint32_t first_barrier = 0, barrier_count = 0;
204    VkResult ret = VK_SUCCESS;
205 
206    if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
207       flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
208 
209    dzn_foreach_aspect(aspect, range->aspectMask) {
210       D3D12_RESOURCE_STATES after =
211          dzn_image_layout_to_state(image, new_layout, aspect);
212       D3D12_RESOURCE_STATES before =
213          (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
214           old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
215          image->mem->initial_state :
216          dzn_image_layout_to_state(image, old_layout, aspect);
217 
218       uint32_t layer_count = dzn_get_layer_count(image, range);
219       uint32_t level_count = dzn_get_level_count(image, range);
220       for (uint32_t layer = 0; layer < layer_count; layer++) {
221          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
222          if (!barrier_count) {
223             first_barrier = subres;
224             barrier_count = level_count;
225             continue;
226          } else if (first_barrier + barrier_count == subres) {
227             barrier_count += level_count;
228             continue;
229          }
230 
231          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
232                                                         first_barrier, barrier_count,
233                                                         before, after, flags);
234          if (ret != VK_SUCCESS)
235             return ret;
236 
237          barrier_count = 0;
238       }
239 
240       if (barrier_count) {
241          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
242                                                         first_barrier, barrier_count,
243                                                         before, after, flags);
244          if (ret != VK_SUCCESS)
245             return ret;
246       }
247    }
248 
249    return VK_SUCCESS;
250 }
251 
252 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)253 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
254 {
255    if (!cbuf)
256       return;
257 
258    struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
259 
260    if (cmdbuf->cmdlist)
261       ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
262 
263    if (cmdbuf->cmdalloc)
264       ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
265 
266    list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
267       list_del(&res->link);
268       ID3D12Resource_Release(res->res);
269       vk_free(&cbuf->pool->alloc, res);
270    }
271 
272    dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
273    dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
274    dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
275    dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
276    util_dynarray_fini(&cmdbuf->events.wait);
277    util_dynarray_fini(&cmdbuf->events.signal);
278    util_dynarray_fini(&cmdbuf->queries.reset);
279    util_dynarray_fini(&cmdbuf->queries.wait);
280    util_dynarray_fini(&cmdbuf->queries.signal);
281 
282    if (cmdbuf->rtvs.ht) {
283       hash_table_foreach(cmdbuf->rtvs.ht, he)
284          vk_free(&cbuf->pool->alloc, he->data);
285       _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
286    }
287 
288    if (cmdbuf->dsvs.ht) {
289       hash_table_foreach(cmdbuf->dsvs.ht, he)
290          vk_free(&cbuf->pool->alloc, he->data);
291       _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
292    }
293 
294    if (cmdbuf->events.ht)
295       _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
296 
297    if (cmdbuf->queries.ht) {
298       hash_table_foreach(cmdbuf->queries.ht, he) {
299          struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
300          util_dynarray_fini(&qpstate->reset);
301          util_dynarray_fini(&qpstate->collect);
302          util_dynarray_fini(&qpstate->wait);
303          util_dynarray_fini(&qpstate->signal);
304          vk_free(&cbuf->pool->alloc, he->data);
305       }
306       _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
307    }
308 
309    if (cmdbuf->transition_barriers) {
310       hash_table_foreach(cmdbuf->transition_barriers, he)
311          vk_free(&cbuf->pool->alloc, he->data);
312       _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
313    }
314 
315    vk_command_buffer_finish(&cmdbuf->vk);
316    vk_free(&cbuf->pool->alloc, cmdbuf);
317 }
318 
319 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)320 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
321 {
322    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
323 }
324 
325 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)326 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
327 {
328    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
329 }
330 
331 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)332 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
333 {
334    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
335 }
336 
337 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)338 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
339 {
340    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
341 }
342 
343 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)344 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
345                       VkCommandBuffer *out)
346 {
347    VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
348    struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
349    struct dzn_physical_device *pdev =
350       container_of(device->vk.physical, struct dzn_physical_device, vk);
351 
352    assert(pool->queue_family_index < pdev->queue_family_count);
353 
354    D3D12_COMMAND_LIST_TYPE type =
355       pdev->queue_families[pool->queue_family_index].desc.Type;
356 
357    struct dzn_cmd_buffer *cmdbuf =
358       vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
359    if (!cmdbuf)
360       return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
361 
362    VkResult result =
363       vk_command_buffer_init(&cmdbuf->vk, pool, info->level);
364    if (result != VK_SUCCESS) {
365       vk_free(&pool->alloc, cmdbuf);
366       return result;
367    }
368 
369    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
370    list_inithead(&cmdbuf->internal_bufs);
371    util_dynarray_init(&cmdbuf->events.wait, NULL);
372    util_dynarray_init(&cmdbuf->events.signal, NULL);
373    util_dynarray_init(&cmdbuf->queries.reset, NULL);
374    util_dynarray_init(&cmdbuf->queries.wait, NULL);
375    util_dynarray_init(&cmdbuf->queries.signal, NULL);
376    dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
377                                  D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
378                                  false, &pool->alloc);
379    dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
380                                  D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
381                                  false, &pool->alloc);
382    dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
383                                  D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
384                                  true, &pool->alloc);
385    dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
386                                  D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
387                                  true, &pool->alloc);
388 
389    cmdbuf->events.ht =
390       _mesa_pointer_hash_table_create(NULL);
391    cmdbuf->queries.ht =
392       _mesa_pointer_hash_table_create(NULL);
393    cmdbuf->transition_barriers =
394       _mesa_pointer_hash_table_create(NULL);
395    cmdbuf->rtvs.ht =
396       _mesa_hash_table_create(NULL,
397                               dzn_cmd_buffer_rtv_key_hash_function,
398                               dzn_cmd_buffer_rtv_key_equals_function);
399    cmdbuf->dsvs.ht =
400       _mesa_hash_table_create(NULL,
401                               dzn_cmd_buffer_dsv_key_hash_function,
402                               dzn_cmd_buffer_dsv_key_equals_function);
403    if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
404        !cmdbuf->transition_barriers ||
405        !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
406       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
407       goto out;
408    }
409 
410    cmdbuf->vk.destroy = dzn_cmd_buffer_destroy;
411 
412    if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
413                                                    &IID_ID3D12CommandAllocator,
414                                                    (void **)&cmdbuf->cmdalloc))) {
415       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
416       goto out;
417    }
418 
419    if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type,
420                                               cmdbuf->cmdalloc, NULL,
421                                               &IID_ID3D12GraphicsCommandList1,
422                                               (void **)&cmdbuf->cmdlist))) {
423       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
424       goto out;
425    }
426 
427 out:
428    if (result != VK_SUCCESS)
429       dzn_cmd_buffer_destroy(&cmdbuf->vk);
430    else
431       *out = dzn_cmd_buffer_to_handle(cmdbuf);
432 
433    return result;
434 }
435 
436 static VkResult
dzn_cmd_buffer_reset(struct dzn_cmd_buffer * cmdbuf)437 dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf)
438 {
439    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
440    const struct dzn_physical_device *pdev =
441       container_of(device->vk.physical, struct dzn_physical_device, vk);
442    const struct vk_command_pool *pool = cmdbuf->vk.pool;
443 
444    /* Reset the state */
445    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
446 
447    /* TODO: Return resources to the pool */
448    list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
449       list_del(&res->link);
450       ID3D12Resource_Release(res->res);
451       vk_free(&cmdbuf->vk.pool->alloc, res);
452    }
453 
454    cmdbuf->error = VK_SUCCESS;
455    util_dynarray_clear(&cmdbuf->events.wait);
456    util_dynarray_clear(&cmdbuf->events.signal);
457    util_dynarray_clear(&cmdbuf->queries.reset);
458    util_dynarray_clear(&cmdbuf->queries.wait);
459    util_dynarray_clear(&cmdbuf->queries.signal);
460    hash_table_foreach(cmdbuf->rtvs.ht, he)
461       vk_free(&cmdbuf->vk.pool->alloc, he->data);
462    _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
463    cmdbuf->null_rtv.ptr = 0;
464    dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
465    hash_table_foreach(cmdbuf->dsvs.ht, he)
466       vk_free(&cmdbuf->vk.pool->alloc, he->data);
467    _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
468    hash_table_foreach(cmdbuf->queries.ht, he) {
469       struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
470       util_dynarray_fini(&qpstate->reset);
471       util_dynarray_fini(&qpstate->collect);
472       util_dynarray_fini(&qpstate->wait);
473       util_dynarray_fini(&qpstate->signal);
474       vk_free(&cmdbuf->vk.pool->alloc, he->data);
475    }
476    _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
477    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
478    hash_table_foreach(cmdbuf->transition_barriers, he)
479       vk_free(&cmdbuf->vk.pool->alloc, he->data);
480    _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
481    dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
482    dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
483    dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
484    vk_command_buffer_reset(&cmdbuf->vk);
485 
486    /* cmdlist->Reset() doesn't return the memory back the the command list
487     * allocator, and cmdalloc->Reset() can only be called if there's no live
488     * cmdlist allocated from the allocator, so we need to release and create
489     * a new command list.
490     */
491    ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
492    cmdbuf->cmdlist = NULL;
493    ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
494    D3D12_COMMAND_LIST_TYPE type =
495       pdev->queue_families[pool->queue_family_index].desc.Type;
496    if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0,
497                                               type,
498                                               cmdbuf->cmdalloc, NULL,
499                                               &IID_ID3D12GraphicsCommandList1,
500                                               (void **)&cmdbuf->cmdlist))) {
501       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
502    }
503 
504    return cmdbuf->error;
505 }
506 
507 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)508 dzn_AllocateCommandBuffers(VkDevice device,
509                            const VkCommandBufferAllocateInfo *pAllocateInfo,
510                            VkCommandBuffer *pCommandBuffers)
511 {
512    VK_FROM_HANDLE(dzn_device, dev, device);
513    VkResult result = VK_SUCCESS;
514    uint32_t i;
515 
516    for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
517       result = dzn_cmd_buffer_create(pAllocateInfo,
518                                      &pCommandBuffers[i]);
519       if (result != VK_SUCCESS)
520          break;
521    }
522 
523    if (result != VK_SUCCESS) {
524       dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
525                                                 i, pCommandBuffers);
526       for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
527          pCommandBuffers[i] = VK_NULL_HANDLE;
528    }
529 
530    return result;
531 }
532 
533 VKAPI_ATTR VkResult VKAPI_CALL
dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,VkCommandBufferResetFlags flags)534 dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,
535                        VkCommandBufferResetFlags flags)
536 {
537    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
538 
539    return dzn_cmd_buffer_reset(cmdbuf);
540 }
541 
542 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)543 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
544                        const VkCommandBufferBeginInfo *info)
545 {
546    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
547 
548    /* If this is the first vkBeginCommandBuffer, we must *initialize* the
549     * command buffer's state. Otherwise, we must *reset* its state. In both
550     * cases we reset it.
551     *
552     * From the Vulkan 1.0 spec:
553     *
554     *    If a command buffer is in the executable state and the command buffer
555     *    was allocated from a command pool with the
556     *    VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
557     *    vkBeginCommandBuffer implicitly resets the command buffer, behaving
558     *    as if vkResetCommandBuffer had been called with
559     *    VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
560     *    the command buffer in the recording state.
561     */
562    return dzn_cmd_buffer_reset(cmdbuf);
563 }
564 
565 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)566 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
567 {
568    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
569 
570    if (cmdbuf->error != VK_SUCCESS)
571       goto out;
572 
573    hash_table_foreach(cmdbuf->events.ht, he) {
574       enum dzn_event_state state = (uintptr_t)he->data;
575 
576       if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) {
577          struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state  == DZN_EVENT_STATE_SET };
578          struct dzn_cmd_event_signal *entry =
579             util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
580 
581          if (!entry) {
582             cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583             break;
584          }
585 
586          *entry = signal;
587       }
588    }
589 
590 out:
591    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
592 }
593 
594 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)595 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
596 {
597    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
598 
599    if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
600       return VK_SUCCESS;
601 
602    unsigned old_sz = array->size;
603    void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
604    if (!ptr) {
605       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
606       return cmdbuf->error;
607    }
608 
609    memset(ptr, 0, array->size - old_sz);
610    return VK_SUCCESS;
611 }
612 
613 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)614 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
615 {
616    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
617 
618    if (bit < nbits)
619       return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
620 
621    return false;
622 }
623 
624 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)625 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
626 {
627    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
628    if (result != VK_SUCCESS)
629       return result;
630 
631    BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
632    return VK_SUCCESS;
633 }
634 
635 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)636 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
637 {
638    if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
639       return;
640 
641    BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
642 }
643 
644 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)645 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
646                                    struct util_dynarray *array,
647                                    uint32_t bit, uint32_t count)
648 {
649    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
650    if (result != VK_SUCCESS)
651       return result;
652 
653    BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
654    return VK_SUCCESS;
655 }
656 
657 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)658 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
659                                      struct util_dynarray *array,
660                                      uint32_t bit, uint32_t count)
661 {
662    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
663 
664    if (!nbits)
665       return;
666 
667    uint32_t end = MIN2(bit + count, nbits) - 1;
668 
669    while (bit <= end) {
670       uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
671       BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
672       bit += subcount;
673    }
674 }
675 
676 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)677 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
678 {
679    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
680    struct dzn_cmd_buffer_query_pool_state *state =
681       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
682                8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
683    if (!state) {
684       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
685       return NULL;
686    }
687 
688    util_dynarray_init(&state->reset, NULL);
689    util_dynarray_init(&state->collect, NULL);
690    util_dynarray_init(&state->wait, NULL);
691    util_dynarray_init(&state->signal, NULL);
692    return state;
693 }
694 
695 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)696 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
697                                         struct dzn_cmd_buffer_query_pool_state *state)
698 {
699    util_dynarray_fini(&state->reset);
700    util_dynarray_fini(&state->collect);
701    util_dynarray_fini(&state->wait);
702    util_dynarray_fini(&state->signal);
703    vk_free(&cmdbuf->vk.pool->alloc, state);
704 }
705 
706 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)707 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
708                                     struct dzn_query_pool *qpool)
709 {
710    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
711    struct dzn_cmd_buffer_query_pool_state *state = NULL;
712    struct hash_entry *he =
713       _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
714 
715    if (!he) {
716       state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
717       if (!state)
718          return NULL;
719 
720       he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
721       if (!he) {
722          dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
723          cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
724          return NULL;
725       }
726    } else {
727       state = he->data;
728    }
729 
730    return state;
731 }
732 
733 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)734 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
735                                const struct dzn_query_pool *qpool,
736                                struct dzn_cmd_buffer_query_pool_state *state,
737                                uint32_t first_query,
738                                uint32_t query_count)
739 {
740    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
741    uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
742    uint32_t start, end;
743 
744    if (!nbits)
745       return VK_SUCCESS;
746 
747    query_count = MIN2(query_count, nbits - first_query);
748    nbits = MIN2(first_query + query_count, nbits);
749 
750    VkResult result =
751       dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
752    if (result != VK_SUCCESS)
753       return result;
754 
755    dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
756 
757    BITSET_WORD *collect =
758       util_dynarray_element(&state->collect, BITSET_WORD, 0);
759 
760    for (start = first_query, end = first_query,
761         __bitset_next_range(&start, &end, collect, nbits);
762         start < nbits;
763         __bitset_next_range(&start, &end, collect, nbits)) {
764       ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
765                                                   qpool->heap,
766                                                   qpool->queries[start].type,
767                                                   start, end - start,
768                                                   qpool->resolve_buffer,
769                                                   qpool->query_size * start);
770    }
771 
772    uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
773    uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
774 
775    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
776                                             0, 1,
777                                             D3D12_RESOURCE_STATE_COPY_DEST,
778                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
779                                             DZN_QUEUE_TRANSITION_FLUSH);
780 
781    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
782                                                qpool->collect_buffer, offset,
783                                                qpool->resolve_buffer, offset,
784                                                size);
785 
786    for (start = first_query, end = first_query,
787         __bitset_next_range(&start, &end, collect, nbits);
788         start < nbits;
789         __bitset_next_range(&start, &end, collect, nbits)) {
790       uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
791       uint32_t count = end - start;
792 
793       for (unsigned i = 0; i < count; i += step) {
794          uint32_t sub_count = MIN2(step, count - i);
795 
796          ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
797                                                      qpool->collect_buffer,
798                                                      dzn_query_pool_get_availability_offset(qpool, start + i),
799                                                      device->queries.refs,
800                                                      DZN_QUERY_REFS_ALL_ONES_OFFSET,
801                                                      sizeof(uint64_t) * sub_count);
802       }
803 
804       dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
805       dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count);
806    }
807 
808    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
809                                             0, 1,
810                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
811                                             D3D12_RESOURCE_STATE_COPY_DEST,
812                                             0);
813    return VK_SUCCESS;
814 }
815 
816 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)817 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
818                                  struct dzn_query_pool *qpool,
819                                  struct util_dynarray *bitset_array,
820                                  struct util_dynarray *ops_array)
821 {
822    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
823    BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
824    uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
825    uint32_t start, end;
826 
827    BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
828       struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
829       struct dzn_cmd_buffer_query_range *entry =
830          util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
831 
832       if (!entry) {
833          cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
834          return cmdbuf->error;
835       }
836 
837       *entry = range;
838    }
839 
840    return VK_SUCCESS;
841 }
842 
843 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)844 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
845 {
846    hash_table_foreach(cmdbuf->queries.ht, he) {
847       struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
848       struct dzn_cmd_buffer_query_pool_state *state = he->data;
849       VkResult result =
850          dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
851       if (result != VK_SUCCESS)
852          return result;
853 
854       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
855       if (result != VK_SUCCESS)
856          return result;
857 
858       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait);
859       if (result != VK_SUCCESS)
860          return result;
861 
862       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
863       if (result != VK_SUCCESS)
864          return result;
865    }
866 
867    return VK_SUCCESS;
868 }
869 
870 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)871 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
872 {
873    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
874 
875    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
876       dzn_cmd_buffer_gather_events(cmdbuf);
877       dzn_cmd_buffer_gather_queries(cmdbuf);
878       HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
879       if (FAILED(hres))
880          cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
881    } else {
882       cmdbuf->error = cmdbuf->vk.cmd_queue.error;
883    }
884 
885    return cmdbuf->error;
886 }
887 
888 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)889 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
890                         const VkDependencyInfo *info)
891 {
892    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
893 
894    bool execution_barrier =
895       !info->memoryBarrierCount &&
896       !info->bufferMemoryBarrierCount &&
897       !info->imageMemoryBarrierCount;
898 
899    if (execution_barrier) {
900       /* Execution barrier can be emulated with a NULL UAV barrier (AKA
901        * pipeline flush). That's the best we can do with the standard D3D12
902        * barrier API.
903        */
904       D3D12_RESOURCE_BARRIER barrier = {
905          .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
906          .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
907          .UAV = { .pResource = NULL },
908       };
909 
910       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
911    }
912 
913    /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
914     * Scopes are not taken into account, but that's inherent to the current
915     * D3D12 barrier API.
916     */
917    if (info->memoryBarrierCount) {
918       D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
919 
920       barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
921       barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
922       barriers[0].UAV.pResource = NULL;
923       barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
924       barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
925       barriers[1].Aliasing.pResourceBefore = NULL;
926       barriers[1].Aliasing.pResourceAfter = NULL;
927       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
928    }
929 
930    for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
931       VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
932       D3D12_RESOURCE_BARRIER barrier = { 0 };
933 
934       /* UAV are used only for storage buffers, skip all other buffers. */
935       if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
936          continue;
937 
938       barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
939       barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
940       barrier.UAV.pResource = buf->res;
941       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
942    }
943 
944    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
945       const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
946       const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
947       VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
948 
949       /* We use placed resource's simple model, in which only one resource
950        * pointing to a given heap is active at a given time. To make the
951        * resource active we need to add an aliasing barrier.
952        */
953       D3D12_RESOURCE_BARRIER aliasing_barrier = {
954          .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING,
955          .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
956          .Aliasing = {
957             .pResourceBefore = NULL,
958             .pResourceAfter = image->res,
959          },
960       };
961 
962       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier);
963 
964       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
965                                                          ibarrier->oldLayout,
966                                                          ibarrier->newLayout,
967                                                          DZN_QUEUE_TRANSITION_FLUSH);
968    }
969 }
970 
971 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)972 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
973                        const struct dzn_image *image,
974                        const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
975 {
976    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
977    struct dzn_cmd_buffer_dsv_key key = { image, *desc };
978    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
979    struct dzn_cmd_buffer_dsv_entry *dsve;
980 
981    if (!he) {
982       struct dzn_descriptor_heap *heap;
983       uint32_t slot;
984 
985       // TODO: error handling
986       dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
987                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
988       dsve->key = key;
989       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
990       dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
991       ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
992       _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
993    } else {
994       dsve = he->data;
995    }
996 
997    return dsve->handle;
998 }
999 
1000 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1001 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1002                        const struct dzn_image *image,
1003                        const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1004 {
1005    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1006    struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1007    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1008    struct dzn_cmd_buffer_rtv_entry *rtve;
1009 
1010    if (!he) {
1011       struct dzn_descriptor_heap *heap;
1012       uint32_t slot;
1013 
1014       // TODO: error handling
1015       rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1016                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1017       rtve->key = key;
1018       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1019       rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1020       ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1021       he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1022    } else {
1023       rtve = he->data;
1024    }
1025 
1026    return rtve->handle;
1027 }
1028 
1029 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1030 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1031 {
1032    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1033 
1034    if (!cmdbuf->null_rtv.ptr) {
1035       struct dzn_descriptor_heap *heap;
1036       uint32_t slot;
1037       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1038       cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1039 
1040       D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1041       desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1042       desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1043       desc.Texture2D.MipSlice = 0;
1044       desc.Texture2D.PlaneSlice = 0;
1045 
1046       ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1047    }
1048 
1049    return cmdbuf->null_rtv;
1050 }
1051 
1052 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,D3D12_HEAP_TYPE heap_type,D3D12_RESOURCE_STATES init_state,ID3D12Resource ** out)1053 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1054                                   uint32_t size,
1055                                   D3D12_HEAP_TYPE heap_type,
1056                                   D3D12_RESOURCE_STATES init_state,
1057                                   ID3D12Resource **out)
1058 {
1059    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1060    ID3D12Resource *res;
1061    *out = NULL;
1062 
1063    /* Align size on 64k (the default alignment) */
1064    size = ALIGN_POT(size, 64 * 1024);
1065 
1066    D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, heap_type);
1067    D3D12_RESOURCE_DESC rdesc = {
1068       .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1069       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1070       .Width = size,
1071       .Height = 1,
1072       .DepthOrArraySize = 1,
1073       .MipLevels = 1,
1074       .Format = DXGI_FORMAT_UNKNOWN,
1075       .SampleDesc = { .Count = 1, .Quality = 0 },
1076       .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1077       .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1078    };
1079 
1080    HRESULT hres =
1081       ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1082                                             D3D12_HEAP_FLAG_NONE, &rdesc,
1083                                             init_state, NULL,
1084                                             &IID_ID3D12Resource,
1085                                             (void **)&res);
1086    if (FAILED(hres)) {
1087       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1088       return cmdbuf->error;
1089    }
1090 
1091    struct dzn_internal_resource *entry =
1092       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1093                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1094    if (!entry) {
1095       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1096       ID3D12Resource_Release(res);
1097       return cmdbuf->error;
1098    }
1099 
1100    entry->res = res;
1101    list_addtail(&entry->link, &cmdbuf->internal_bufs);
1102    *out = entry->res;
1103    return VK_SUCCESS;
1104 }
1105 
1106 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1107 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1108                                      const struct dzn_image *image,
1109                                      VkImageLayout layout,
1110                                      const VkClearColorValue *color,
1111                                      const VkImageSubresourceRange *range,
1112                                      uint32_t rect_count, D3D12_RECT *rects)
1113 {
1114    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1115    uint32_t blksize = util_format_get_blocksize(pfmt);
1116    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1117    uint32_t raw[4] = { 0 };
1118 
1119    assert(blksize <= sizeof(raw));
1120    assert(!(sizeof(buf) % blksize));
1121 
1122    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1123 
1124    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1125    while (fill_step % blksize)
1126       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1127 
1128    uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1129    uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1130    uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1131    uint32_t res_size = max_h * row_pitch;
1132 
1133    assert(fill_step <= sizeof(buf));
1134 
1135    for (uint32_t i = 0; i < fill_step; i += blksize)
1136       memcpy(&buf[i], raw, blksize);
1137 
1138    ID3D12Resource *src_res;
1139 
1140    VkResult result =
1141       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1142                                         D3D12_HEAP_TYPE_UPLOAD,
1143                                         D3D12_RESOURCE_STATE_GENERIC_READ,
1144                                         &src_res);
1145    if (result != VK_SUCCESS)
1146       return;
1147 
1148    assert(!(res_size % fill_step));
1149 
1150    uint8_t *cpu_ptr;
1151    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1152    for (uint32_t i = 0; i < res_size; i += fill_step)
1153       memcpy(&cpu_ptr[i], buf, fill_step);
1154 
1155    ID3D12Resource_Unmap(src_res, 0, NULL);
1156 
1157    D3D12_TEXTURE_COPY_LOCATION src_loc = {
1158       .pResource = src_res,
1159       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1160       .PlacedFootprint = {
1161          .Offset = 0,
1162          .Footprint = {
1163             .Width = max_w,
1164             .Height = max_h,
1165             .Depth = 1,
1166             .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1167          },
1168       },
1169    };
1170 
1171    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1172                                             D3D12_RESOURCE_STATE_GENERIC_READ,
1173                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
1174                                             DZN_QUEUE_TRANSITION_FLUSH);
1175 
1176    dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1177                                                       layout,
1178                                                       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1179                                                       DZN_QUEUE_TRANSITION_FLUSH);
1180 
1181    assert(dzn_get_level_count(image, range) == 1);
1182    uint32_t layer_count = dzn_get_layer_count(image, range);
1183 
1184    dzn_foreach_aspect(aspect, range->aspectMask) {
1185       VkImageSubresourceLayers subres = {
1186          .aspectMask = (VkImageAspectFlags)aspect,
1187          .mipLevel = range->baseMipLevel,
1188          .baseArrayLayer = range->baseArrayLayer,
1189          .layerCount = layer_count,
1190       };
1191 
1192       for (uint32_t layer = 0; layer < layer_count; layer++) {
1193          D3D12_TEXTURE_COPY_LOCATION dst_loc =
1194             dzn_image_get_copy_loc(image, &subres, aspect, layer);
1195 
1196          src_loc.PlacedFootprint.Footprint.Format =
1197             dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1198             dst_loc.PlacedFootprint.Footprint.Format :
1199             image->desc.Format;
1200 
1201          for (uint32_t r = 0; r < rect_count; r++) {
1202             D3D12_BOX src_box = {
1203                .left = 0,
1204                .top = 0,
1205                .front = 0,
1206                .right = (UINT)(rects[r].right - rects[r].left),
1207                .bottom = (UINT)(rects[r].bottom - rects[r].top),
1208                .back = 1,
1209             };
1210 
1211             ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1212                                                          &dst_loc,
1213                                                          rects[r].left,
1214                                                          rects[r].top, 0,
1215                                                          &src_loc,
1216                                                          &src_box);
1217          }
1218       }
1219    }
1220 
1221    dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1222                                                       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1223                                                       layout,
1224                                                       DZN_QUEUE_TRANSITION_FLUSH);
1225 }
1226 
1227 static VkClearColorValue
adjust_clear_color(VkFormat format,const VkClearColorValue * col)1228 adjust_clear_color(VkFormat format, const VkClearColorValue *col)
1229 {
1230    VkClearColorValue out = *col;
1231 
1232    // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1233    // manually where it matters, like here, in the clear path.
1234    if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1235       DZN_SWAP(float, out.float32[0], out.float32[1]);
1236       DZN_SWAP(float, out.float32[2], out.float32[3]);
1237    }
1238 
1239    return out;
1240 }
1241 
1242 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1243 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1244                                       const struct dzn_image *image,
1245                                       VkImageLayout layout,
1246                                       const VkClearColorValue *color,
1247                                       uint32_t range_count,
1248                                       const VkImageSubresourceRange *ranges)
1249 {
1250    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1251    uint32_t blksize = util_format_get_blocksize(pfmt);
1252    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1253    uint32_t raw[4] = { 0 };
1254 
1255    assert(blksize <= sizeof(raw));
1256    assert(!(sizeof(buf) % blksize));
1257 
1258    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1259 
1260    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1261    while (fill_step % blksize)
1262       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1263 
1264    uint32_t res_size = 0;
1265    for (uint32_t r = 0; r < range_count; r++) {
1266       uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
1267       uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
1268       uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
1269       uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
1270 
1271       res_size = MAX2(res_size, h * d * row_pitch);
1272    }
1273 
1274    assert(fill_step <= sizeof(buf));
1275 
1276    for (uint32_t i = 0; i < fill_step; i += blksize)
1277       memcpy(&buf[i], raw, blksize);
1278 
1279    ID3D12Resource *src_res;
1280 
1281    VkResult result =
1282       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1283                                         D3D12_HEAP_TYPE_UPLOAD,
1284                                         D3D12_RESOURCE_STATE_GENERIC_READ,
1285                                         &src_res);
1286    if (result != VK_SUCCESS)
1287       return;
1288 
1289    assert(!(res_size % fill_step));
1290 
1291    uint8_t *cpu_ptr;
1292    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1293    for (uint32_t i = 0; i < res_size; i += fill_step)
1294       memcpy(&cpu_ptr[i], buf, fill_step);
1295 
1296    ID3D12Resource_Unmap(src_res, 0, NULL);
1297 
1298    D3D12_TEXTURE_COPY_LOCATION src_loc = {
1299       .pResource = src_res,
1300       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1301       .PlacedFootprint = {
1302          .Offset = 0,
1303       },
1304    };
1305 
1306    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1307                                             D3D12_RESOURCE_STATE_GENERIC_READ,
1308                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
1309                                             DZN_QUEUE_TRANSITION_FLUSH);
1310 
1311    for (uint32_t r = 0; r < range_count; r++) {
1312       uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
1313       uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
1314 
1315       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1316                                                          layout,
1317                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1318                                                          DZN_QUEUE_TRANSITION_FLUSH);
1319 
1320       dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
1321          for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1322             uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
1323             uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
1324             uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
1325             VkImageSubresourceLayers subres = {
1326                .aspectMask = (VkImageAspectFlags)aspect,
1327                .mipLevel = ranges[r].baseMipLevel + lvl,
1328                .baseArrayLayer = ranges[r].baseArrayLayer,
1329                .layerCount = layer_count,
1330             };
1331 
1332             for (uint32_t layer = 0; layer < layer_count; layer++) {
1333                D3D12_TEXTURE_COPY_LOCATION dst_loc =
1334                   dzn_image_get_copy_loc(image, &subres, aspect, layer);
1335 
1336                src_loc.PlacedFootprint.Footprint.Format =
1337                   dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1338                   dst_loc.PlacedFootprint.Footprint.Format :
1339                   image->desc.Format;
1340                src_loc.PlacedFootprint.Footprint.Width = w;
1341                src_loc.PlacedFootprint.Footprint.Height = h;
1342                src_loc.PlacedFootprint.Footprint.Depth = d;
1343                src_loc.PlacedFootprint.Footprint.RowPitch =
1344                   ALIGN_NPOT(w * blksize, fill_step);
1345                D3D12_BOX src_box = {
1346                   .left = 0,
1347                   .top = 0,
1348                   .front = 0,
1349                   .right = w,
1350                   .bottom = h,
1351                   .back = d,
1352                };
1353 
1354                ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
1355                                                   &src_loc, &src_box);
1356 
1357             }
1358          }
1359       }
1360 
1361       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1362                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1363                                                          layout,
1364                                                          DZN_QUEUE_TRANSITION_FLUSH);
1365    }
1366 }
1367 
1368 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)1369 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
1370                                 struct dzn_image_view *view,
1371                                 VkImageLayout layout,
1372                                 const VkClearValue *value,
1373                                 VkImageAspectFlags aspects,
1374                                 uint32_t base_layer,
1375                                 uint32_t layer_count,
1376                                 uint32_t rect_count,
1377                                 D3D12_RECT *rects)
1378 {
1379    struct dzn_image *image =
1380       container_of(view->vk.image, struct dzn_image, vk);
1381 
1382    VkImageSubresourceRange range = {
1383       .aspectMask = aspects,
1384       .baseMipLevel = view->vk.base_mip_level,
1385       .levelCount = 1,
1386       .baseArrayLayer = view->vk.base_array_layer + base_layer,
1387       .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
1388                     view->vk.layer_count - base_layer : layer_count,
1389    };
1390 
1391    layer_count = vk_image_subresource_layer_count(&image->vk, &range);
1392 
1393    if (vk_format_is_depth_or_stencil(view->vk.format)) {
1394       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1395 
1396       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1397          flags |= D3D12_CLEAR_FLAG_DEPTH;
1398       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1399          flags |= D3D12_CLEAR_FLAG_STENCIL;
1400 
1401       if (flags != 0) {
1402          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1403                                                             layout,
1404                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1405                                                             DZN_QUEUE_TRANSITION_FLUSH);
1406 
1407          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
1408          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1409          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
1410                                                 value->depthStencil.depth,
1411                                                 value->depthStencil.stencil,
1412                                                 rect_count, rects);
1413 
1414          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1415                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1416                                                             layout,
1417                                                             DZN_QUEUE_TRANSITION_FLUSH);
1418       }
1419    } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1420       VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color);
1421       bool clear_with_cpy = false;
1422       float vals[4];
1423 
1424       if (vk_format_is_sint(view->vk.format)) {
1425          for (uint32_t i = 0; i < 4; i++) {
1426             vals[i] = color.int32[i];
1427             if (color.int32[i] != (int32_t)vals[i]) {
1428                clear_with_cpy = true;
1429                break;
1430             }
1431          }
1432       } else if (vk_format_is_uint(view->vk.format)) {
1433          for (uint32_t i = 0; i < 4; i++) {
1434             vals[i] = color.uint32[i];
1435             if (color.uint32[i] != (uint32_t)vals[i]) {
1436                clear_with_cpy = true;
1437                break;
1438             }
1439          }
1440       } else {
1441          for (uint32_t i = 0; i < 4; i++)
1442             vals[i] = color.float32[i];
1443       }
1444 
1445       if (clear_with_cpy) {
1446          dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
1447                                               VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1448                                               &value->color,
1449                                               &range, rect_count, rects);
1450       } else {
1451          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1452                                                             layout,
1453                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1454                                                             DZN_QUEUE_TRANSITION_FLUSH);
1455 
1456          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
1457          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1458          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
1459 
1460          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1461                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1462                                                             layout,
1463                                                             DZN_QUEUE_TRANSITION_FLUSH);
1464       }
1465    }
1466 }
1467 
1468 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)1469 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
1470                            const struct dzn_image *image,
1471                            VkImageLayout layout,
1472                            const VkClearColorValue *col,
1473                            uint32_t range_count,
1474                            const VkImageSubresourceRange *ranges)
1475 {
1476    if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
1477       dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1478       return;
1479    }
1480 
1481    VkClearColorValue color = adjust_clear_color(image->vk.format, col);
1482    float clear_vals[4];
1483 
1484    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1485 
1486    if (util_format_is_pure_sint(pfmt)) {
1487       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1488          clear_vals[c] = color.int32[c];
1489          if (color.int32[c] != (int32_t)clear_vals[c]) {
1490             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1491             return;
1492          }
1493       }
1494    } else if (util_format_is_pure_uint(pfmt)) {
1495       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1496          clear_vals[c] = color.uint32[c];
1497          if (color.uint32[c] != (uint32_t)clear_vals[c]) {
1498             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1499             return;
1500          }
1501       }
1502    } else {
1503       memcpy(clear_vals, color.float32, sizeof(clear_vals));
1504    }
1505 
1506    for (uint32_t r = 0; r < range_count; r++) {
1507       const VkImageSubresourceRange *range = &ranges[r];
1508       uint32_t level_count = dzn_get_level_count(image, range);
1509 
1510       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1511                                                          layout,
1512                                                          VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1513                                                          DZN_QUEUE_TRANSITION_FLUSH);
1514       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1515          VkImageSubresourceRange view_range = *range;
1516 
1517          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1518             view_range.baseArrayLayer = 0;
1519             view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
1520          }
1521 
1522          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
1523          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1524          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
1525       }
1526 
1527       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1528                                                          VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1529                                                          layout,
1530                                                          DZN_QUEUE_TRANSITION_FLUSH);
1531    }
1532 }
1533 
1534 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)1535 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
1536                         const struct dzn_image *image,
1537                         VkImageLayout layout,
1538                         const VkClearDepthStencilValue *zs,
1539                         uint32_t range_count,
1540                         const VkImageSubresourceRange *ranges)
1541 {
1542    assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
1543 
1544    for (uint32_t r = 0; r < range_count; r++) {
1545       const VkImageSubresourceRange *range = &ranges[r];
1546       uint32_t level_count = dzn_get_level_count(image, range);
1547 
1548       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1549 
1550       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1551          flags |= D3D12_CLEAR_FLAG_DEPTH;
1552       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
1553          flags |= D3D12_CLEAR_FLAG_STENCIL;
1554 
1555       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1556                                                          layout,
1557                                                          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1558                                                          DZN_QUEUE_TRANSITION_FLUSH);
1559 
1560       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1561          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
1562          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1563          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
1564                                                           handle, flags,
1565                                                           zs->depth,
1566                                                           zs->stencil,
1567                                                           0, NULL);
1568       }
1569 
1570       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1571                                                          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1572                                                          layout,
1573                                                          DZN_QUEUE_TRANSITION_FLUSH);
1574    }
1575 }
1576 
1577 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1578 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
1579                                    const VkCopyBufferToImageInfo2 *info,
1580                                    uint32_t r,
1581                                    VkImageAspectFlagBits aspect,
1582                                    uint32_t l)
1583 {
1584    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
1585    VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
1586 
1587    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1588 
1589    VkBufferImageCopy2 region = info->pRegions[r];
1590    enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
1591    uint32_t blkh = util_format_get_blockheight(pfmt);
1592    uint32_t blkd = util_format_get_blockdepth(pfmt);
1593 
1594    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1595     * to not be block aligned if it's reaching the image boundary, offsets still
1596     * have to be aligned. Align the image extent to make D3D12 happy.
1597     */
1598    dzn_image_align_extent(dst_image, &region.imageExtent);
1599 
1600    D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
1601       dzn_image_get_copy_loc(dst_image, &region.imageSubresource, aspect, l);
1602    D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
1603       dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, &region, aspect, l);
1604 
1605    if (dzn_buffer_supports_region_copy(&src_buf_loc)) {
1606       /* RowPitch and Offset are properly aligned, we can copy
1607        * the whole thing in one call.
1608        */
1609       D3D12_BOX src_box = {
1610          .left = 0,
1611          .top = 0,
1612          .front = 0,
1613          .right = region.imageExtent.width,
1614          .bottom = region.imageExtent.height,
1615          .back = region.imageExtent.depth,
1616       };
1617 
1618       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
1619                                                    region.imageOffset.x,
1620                                                    region.imageOffset.y,
1621                                                    region.imageOffset.z,
1622                                                    &src_buf_loc, &src_box);
1623       return;
1624    }
1625 
1626    /* Copy line-by-line if things are not properly aligned. */
1627    D3D12_BOX src_box = {
1628       .top = 0,
1629       .front = 0,
1630       .bottom = blkh,
1631       .back = blkd,
1632    };
1633 
1634    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1635       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1636          uint32_t src_x;
1637 
1638          D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
1639             dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
1640                                          &region, &src_buf_loc,
1641                                          y, z, &src_x);
1642 
1643          src_box.left = src_x;
1644          src_box.right = src_x + region.imageExtent.width;
1645          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1646                                                       &dst_img_loc,
1647                                                       region.imageOffset.x,
1648                                                       region.imageOffset.y + y,
1649                                                       region.imageOffset.z + z,
1650                                                       &src_buf_line_loc,
1651                                                       &src_box);
1652       }
1653    }
1654 }
1655 
1656 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1657 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
1658                                    const VkCopyImageToBufferInfo2 *info,
1659                                    uint32_t r,
1660                                    VkImageAspectFlagBits aspect,
1661                                    uint32_t l)
1662 {
1663    VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
1664    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
1665 
1666    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1667 
1668    VkBufferImageCopy2 region = info->pRegions[r];
1669    enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
1670    uint32_t blkh = util_format_get_blockheight(pfmt);
1671    uint32_t blkd = util_format_get_blockdepth(pfmt);
1672 
1673    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1674     * to not be block aligned if it's reaching the image boundary, offsets still
1675     * have to be aligned. Align the image extent to make D3D12 happy.
1676     */
1677    dzn_image_align_extent(src_image, &region.imageExtent);
1678 
1679    D3D12_TEXTURE_COPY_LOCATION src_img_loc =
1680       dzn_image_get_copy_loc(src_image, &region.imageSubresource, aspect, l);
1681    D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
1682       dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, &region, aspect, l);
1683 
1684    if (dzn_buffer_supports_region_copy(&dst_buf_loc)) {
1685       /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
1686        * the whole thing in one call.
1687        */
1688       D3D12_BOX src_box = {
1689          .left = (UINT)region.imageOffset.x,
1690          .top = (UINT)region.imageOffset.y,
1691          .front = (UINT)region.imageOffset.z,
1692          .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1693          .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
1694          .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
1695       };
1696 
1697       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
1698                                                    0, 0, 0, &src_img_loc,
1699                                                    &src_box);
1700       return;
1701    }
1702 
1703    D3D12_BOX src_box = {
1704       .left = (UINT)region.imageOffset.x,
1705       .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1706    };
1707 
1708    /* Copy line-by-line if things are not properly aligned. */
1709    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1710       src_box.front = region.imageOffset.z + z;
1711       src_box.back = src_box.front + blkd;
1712 
1713       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1714          uint32_t dst_x;
1715 
1716          D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
1717             dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
1718                                          &region, &dst_buf_loc,
1719                                          y, z, &dst_x);
1720 
1721          src_box.top = region.imageOffset.y + y;
1722          src_box.bottom = src_box.top + blkh;
1723 
1724          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1725                                                       &dst_buf_line_loc,
1726                                                       dst_x, 0, 0,
1727                                                       &src_img_loc,
1728                                                       &src_box);
1729       }
1730    }
1731 }
1732 
1733 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)1734 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
1735                               const VkCopyImageInfo2 *info,
1736                               D3D12_RESOURCE_DESC *tmp_desc,
1737                               D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
1738                               uint32_t r,
1739                               VkImageAspectFlagBits aspect,
1740                               uint32_t l)
1741 {
1742    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1743    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
1744    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
1745 
1746    ID3D12Device2 *dev = device->dev;
1747    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1748 
1749    VkImageCopy2 region = info->pRegions[r];
1750    dzn_image_align_extent(src, &region.extent);
1751 
1752    const VkImageSubresourceLayers *src_subres = &region.srcSubresource;
1753    const VkImageSubresourceLayers *dst_subres = &region.dstSubresource;
1754    VkFormat src_format =
1755       dzn_image_get_plane_format(src->vk.format, aspect);
1756    VkFormat dst_format =
1757       dzn_image_get_plane_format(dst->vk.format, aspect);
1758 
1759    enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
1760    uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
1761    uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
1762    uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
1763    enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
1764    uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
1765    uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
1766    uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
1767    uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
1768    uint32_t depth = region.extent.depth;
1769    uint32_t dst_l = l, src_l = l;
1770 
1771    assert(src_subres->aspectMask == dst_subres->aspectMask);
1772 
1773    if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
1774        dst->vk.image_type == VK_IMAGE_TYPE_2D) {
1775       assert(src_subres->layerCount == 1);
1776       src_l = 0;
1777       src_z += l;
1778       depth = 1;
1779    } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
1780               dst->vk.image_type == VK_IMAGE_TYPE_3D) {
1781       assert(dst_subres->layerCount == 1);
1782       dst_l = 0;
1783       dst_z += l;
1784       depth = 1;
1785    } else {
1786       assert(src_subres->layerCount == dst_subres->layerCount);
1787    }
1788 
1789    D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
1790    D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
1791 
1792    D3D12_BOX src_box = {
1793       .left = (UINT)MAX2(region.srcOffset.x, 0),
1794       .top = (UINT)MAX2(region.srcOffset.y, 0),
1795       .front = (UINT)MAX2(src_z, 0),
1796       .right = (UINT)region.srcOffset.x + region.extent.width,
1797       .bottom = (UINT)region.srcOffset.y + region.extent.height,
1798       .back = (UINT)src_z + depth,
1799    };
1800 
1801    if (!tmp_loc->pResource) {
1802       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1803                                                    region.dstOffset.x,
1804                                                    region.dstOffset.y,
1805                                                    dst_z, &src_loc,
1806                                                    &src_box);
1807       return;
1808    }
1809 
1810    tmp_desc->Format =
1811       dzn_image_get_placed_footprint_format(src->vk.format, aspect);
1812    tmp_desc->Width = region.extent.width;
1813    tmp_desc->Height = region.extent.height;
1814 
1815    ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
1816                                        0, 1, 0,
1817                                        &tmp_loc->PlacedFootprint,
1818                                        NULL, NULL, NULL);
1819 
1820    tmp_loc->PlacedFootprint.Footprint.Depth = depth;
1821 
1822    if (r > 0 || l > 0) {
1823       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1824                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
1825                                                D3D12_RESOURCE_STATE_COPY_DEST,
1826                                                DZN_QUEUE_TRANSITION_FLUSH);
1827    }
1828 
1829    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
1830 
1831    if (r > 0 || l > 0) {
1832       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1833                                                D3D12_RESOURCE_STATE_COPY_DEST,
1834                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
1835                                                DZN_QUEUE_TRANSITION_FLUSH);
1836    }
1837 
1838    tmp_desc->Format =
1839       dzn_image_get_placed_footprint_format(dst->vk.format, aspect);
1840    if (src_blkw != dst_blkw)
1841       tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
1842    if (src_blkh != dst_blkh)
1843       tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
1844 
1845    ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
1846                                        0, 1, 0,
1847                                        &tmp_loc->PlacedFootprint,
1848                                        NULL, NULL, NULL);
1849 
1850    if (src_blkd != dst_blkd) {
1851       tmp_loc->PlacedFootprint.Footprint.Depth =
1852          DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
1853    } else {
1854       tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
1855    }
1856 
1857    D3D12_BOX tmp_box = {
1858       .left = 0,
1859       .top = 0,
1860       .front = 0,
1861       .right = tmp_loc->PlacedFootprint.Footprint.Width,
1862       .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
1863       .back = tmp_loc->PlacedFootprint.Footprint.Depth,
1864    };
1865 
1866    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1867                                                 region.dstOffset.x,
1868                                                 region.dstOffset.y,
1869                                                 dst_z,
1870                                                 tmp_loc, &tmp_box);
1871 }
1872 
1873 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)1874 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
1875                                      VkImage image,
1876                                      VkImageAspectFlagBits aspect,
1877                                      const VkImageSubresourceLayers *subres,
1878                                      struct dzn_descriptor_heap *heap,
1879                                      uint32_t heap_slot)
1880 {
1881    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1882    VK_FROM_HANDLE(dzn_image, img, image);
1883    VkImageViewCreateInfo iview_info = {
1884       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1885       .image = image,
1886       .format = img->vk.format,
1887       .subresourceRange = {
1888          .aspectMask = (VkImageAspectFlags)aspect,
1889          .baseMipLevel = subres->mipLevel,
1890          .levelCount = 1,
1891          .baseArrayLayer = subres->baseArrayLayer,
1892          .layerCount = subres->layerCount,
1893       },
1894    };
1895 
1896    if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1897       iview_info.components.r = VK_COMPONENT_SWIZZLE_G;
1898       iview_info.components.g = VK_COMPONENT_SWIZZLE_G;
1899       iview_info.components.b = VK_COMPONENT_SWIZZLE_G;
1900       iview_info.components.a = VK_COMPONENT_SWIZZLE_G;
1901    } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1902       iview_info.components.r = VK_COMPONENT_SWIZZLE_R;
1903       iview_info.components.g = VK_COMPONENT_SWIZZLE_R;
1904       iview_info.components.b = VK_COMPONENT_SWIZZLE_R;
1905       iview_info.components.a = VK_COMPONENT_SWIZZLE_R;
1906    }
1907 
1908    switch (img->vk.image_type) {
1909    case VK_IMAGE_TYPE_1D:
1910       iview_info.viewType = img->vk.array_layers > 1 ?
1911                             VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
1912       break;
1913    case VK_IMAGE_TYPE_2D:
1914       iview_info.viewType = img->vk.array_layers > 1 ?
1915                             VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
1916       break;
1917    case VK_IMAGE_TYPE_3D:
1918       iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
1919       break;
1920    default:
1921       unreachable("Invalid type");
1922    }
1923 
1924    struct dzn_image_view iview;
1925    dzn_image_view_init(device, &iview, &iview_info);
1926    dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview);
1927    dzn_image_view_finish(&iview);
1928 
1929    D3D12_GPU_DESCRIPTOR_HANDLE handle =
1930       dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
1931    ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
1932 }
1933 
1934 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer)1935 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
1936                                      struct dzn_image *img,
1937                                      VkImageAspectFlagBits aspect,
1938                                      uint32_t level, uint32_t layer)
1939 {
1940    bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1941    VkImageSubresourceRange range = {
1942       .aspectMask = (VkImageAspectFlags)aspect,
1943       .baseMipLevel = level,
1944       .levelCount = 1,
1945       .baseArrayLayer = layer,
1946       .layerCount = 1,
1947    };
1948 
1949    if (ds) {
1950       D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
1951       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
1952       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle);
1953    } else {
1954       D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
1955       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
1956       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL);
1957    }
1958 }
1959 
1960 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,bool resolve)1961 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
1962                                  const struct dzn_image *src,
1963                                  const struct dzn_image *dst,
1964                                  VkImageAspectFlagBits aspect,
1965                                  VkFilter filter, bool resolve)
1966 {
1967    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1968    enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
1969    VkImageUsageFlags usage =
1970       vk_format_is_depth_or_stencil(dst->vk.format) ?
1971       VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
1972       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1973    struct dzn_meta_blit_key ctx_key = {
1974       .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect),
1975       .samples = (uint32_t)src->vk.samples,
1976       .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
1977                         FRAG_RESULT_DEPTH :
1978                         aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
1979                         FRAG_RESULT_STENCIL :
1980                         FRAG_RESULT_DATA0),
1981       .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
1982                              util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
1983                              aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
1984                              GLSL_TYPE_FLOAT),
1985       .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
1986                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
1987                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
1988                                 GLSL_SAMPLER_DIM_3D),
1989       .src_is_array = src->vk.array_layers > 1,
1990       .resolve = resolve,
1991       .linear_filter = filter == VK_FILTER_LINEAR,
1992       .padding = 0,
1993    };
1994 
1995    const struct dzn_meta_blit *ctx =
1996       dzn_meta_blits_get_context(device, &ctx_key);
1997    assert(ctx);
1998 
1999    ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2000    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2001 }
2002 
2003 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2004 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2005                                   const struct dzn_image *src,
2006                                   const VkImageSubresourceLayers *src_subres,
2007                                   const VkOffset3D *src_offsets,
2008                                   const struct dzn_image *dst,
2009                                   const VkImageSubresourceLayers *dst_subres,
2010                                   const VkOffset3D *dst_offsets,
2011                                   bool normalize_src_coords)
2012 {
2013    uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2014    uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2015    uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2016    uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2017 
2018    float dst_pos[4] = {
2019       (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2020       (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2021    };
2022 
2023    float src_pos[4] = {
2024       (float)src_offsets[0].x, (float)src_offsets[0].y,
2025       (float)src_offsets[1].x, (float)src_offsets[1].y,
2026    };
2027 
2028    if (normalize_src_coords) {
2029       src_pos[0] /= src_w;
2030       src_pos[1] /= src_h;
2031       src_pos[2] /= src_w;
2032       src_pos[3] /= src_h;
2033    }
2034 
2035    float coords[] = {
2036       dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2037       dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2038       dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2039       dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2040    };
2041 
2042    ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2043 
2044    D3D12_VIEWPORT vp = {
2045       .TopLeftX = 0,
2046       .TopLeftY = 0,
2047       .Width = (float)dst_w,
2048       .Height = (float)dst_h,
2049       .MinDepth = 0,
2050       .MaxDepth = 1,
2051    };
2052    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2053 
2054    D3D12_RECT scissor = {
2055       .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2056       .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2057       .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2058       .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2059    };
2060    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2061 }
2062 
2063 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,bool post)2064 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2065                                    struct dzn_image *src, VkImageLayout src_layout,
2066                                    const VkImageSubresourceLayers *src_subres,
2067                                    struct dzn_image *dst, VkImageLayout dst_layout,
2068                                    const VkImageSubresourceLayers *dst_subres,
2069                                    VkImageAspectFlagBits aspect,
2070                                    bool post)
2071 {
2072    VkImageSubresourceRange src_range = {
2073       .aspectMask = src_subres->aspectMask,
2074       .baseMipLevel = src_subres->mipLevel,
2075       .levelCount = 1,
2076       .baseArrayLayer = src_subres->baseArrayLayer,
2077       .layerCount = src_subres->layerCount,
2078    };
2079    VkImageSubresourceRange dst_range = {
2080       .aspectMask = dst_subres->aspectMask,
2081       .baseMipLevel = dst_subres->mipLevel,
2082       .levelCount = 1,
2083       .baseArrayLayer = dst_subres->baseArrayLayer,
2084       .layerCount = dst_subres->layerCount,
2085    };
2086 
2087    if (!post) {
2088       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2089                                                          src_layout,
2090                                                          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2091                                                          DZN_QUEUE_TRANSITION_FLUSH);
2092       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2093                                                          dst_layout,
2094                                                          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2095                                                          DZN_QUEUE_TRANSITION_FLUSH);
2096    } else {
2097       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2098                                                          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2099                                                          src_layout,
2100                                                          DZN_QUEUE_TRANSITION_FLUSH);
2101       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2102                                                          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2103                                                          dst_layout,
2104                                                          DZN_QUEUE_TRANSITION_FLUSH);
2105    }
2106 }
2107 
2108 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,uint32_t r)2109 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2110                            const VkBlitImageInfo2 *info,
2111                            struct dzn_descriptor_heap *heap,
2112                            uint32_t *heap_slot,
2113                            uint32_t r)
2114 {
2115    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2116    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2117 
2118    const VkImageBlit2 *region = &info->pRegions[r];
2119    bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2120    bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2121 
2122    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2123       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false);
2124       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2125                                          src, info->srcImageLayout, &region->srcSubresource,
2126                                          dst, info->dstImageLayout, &region->dstSubresource,
2127                                          aspect, false);
2128       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
2129                                            aspect, &region->srcSubresource,
2130                                            heap, (*heap_slot)++);
2131       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2132                                         src, &region->srcSubresource, region->srcOffsets,
2133                                         dst, &region->dstSubresource, region->dstOffsets,
2134                                         src->vk.samples == 1);
2135 
2136       uint32_t dst_depth =
2137          region->dstOffsets[1].z > region->dstOffsets[0].z ?
2138          region->dstOffsets[1].z - region->dstOffsets[0].z :
2139          region->dstOffsets[0].z - region->dstOffsets[1].z;
2140       uint32_t src_depth =
2141          region->srcOffsets[1].z > region->srcOffsets[0].z ?
2142          region->srcOffsets[1].z - region->srcOffsets[0].z :
2143          region->srcOffsets[0].z - region->srcOffsets[1].z;
2144 
2145       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
2146       uint32_t dst_level = region->dstSubresource.mipLevel;
2147 
2148       float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
2149       if (region->srcOffsets[0].z > region->srcOffsets[1].z)
2150          src_slice_step = -src_slice_step;
2151       float src_z_coord =
2152          src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
2153       uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
2154       uint32_t dst_z_coord =
2155          dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
2156       if (region->dstOffsets[0].z > region->dstOffsets[1].z)
2157          dst_z_coord--;
2158 
2159       uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
2160                                 1 : -1;
2161 
2162       /* Normalize the src coordinates/step */
2163       if (src_is_3d) {
2164          src_z_coord /= src->vk.extent.depth;
2165          src_slice_step /= src->vk.extent.depth;
2166       }
2167 
2168       for (uint32_t slice = 0; slice < slice_count; slice++) {
2169          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
2170          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2171          ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2172          src_z_coord += src_slice_step;
2173          dst_z_coord += dst_slice_step;
2174       }
2175 
2176       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2177                                          src, info->srcImageLayout, &region->srcSubresource,
2178                                          dst, info->dstImageLayout, &region->dstSubresource,
2179                                          aspect, true);
2180    }
2181 }
2182 
2183 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,uint32_t r)2184 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
2185                               const VkResolveImageInfo2 *info,
2186                               struct dzn_descriptor_heap *heap,
2187                               uint32_t *heap_slot,
2188                               uint32_t r)
2189 {
2190    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2191    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2192 
2193    const VkImageResolve2 *region = &info->pRegions[r];
2194 
2195    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2196       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true);
2197       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2198                                          src, info->srcImageLayout, &region->srcSubresource,
2199                                          dst, info->dstImageLayout, &region->dstSubresource,
2200                                          aspect, false);
2201       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
2202                                            &region->srcSubresource,
2203                                            heap, (*heap_slot)++);
2204 
2205       VkOffset3D src_offset[2] = {
2206          {
2207             .x = region->srcOffset.x,
2208             .y = region->srcOffset.y,
2209          },
2210          {
2211             .x = (int32_t)(region->srcOffset.x + region->extent.width),
2212             .y = (int32_t)(region->srcOffset.y + region->extent.height),
2213          },
2214       };
2215       VkOffset3D dst_offset[2] = {
2216          {
2217             .x = region->dstOffset.x,
2218             .y = region->dstOffset.y,
2219          },
2220          {
2221             .x = (int32_t)(region->dstOffset.x + region->extent.width),
2222             .y = (int32_t)(region->dstOffset.y + region->extent.height),
2223          },
2224       };
2225 
2226       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2227                                         src, &region->srcSubresource, src_offset,
2228                                         dst, &region->dstSubresource, dst_offset,
2229                                         false);
2230 
2231       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
2232       for (uint32_t layer = 0; layer < layer_count; layer++) {
2233          float src_z_coord = layer;
2234 
2235          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
2236                                               dst, aspect, region->dstSubresource.mipLevel,
2237                                               region->dstSubresource.baseArrayLayer + layer);
2238          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2239          ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2240       }
2241 
2242       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2243                                          src, info->srcImageLayout, &region->srcSubresource,
2244                                          dst, info->dstImageLayout, &region->dstSubresource,
2245                                          aspect, true);
2246    }
2247 }
2248 
2249 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2250 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2251 {
2252    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2253 
2254    if (!pipeline)
2255       return;
2256 
2257    ID3D12PipelineState *old_pipeline_state =
2258       cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
2259 
2260    if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
2261       if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2262          struct dzn_graphics_pipeline *gfx =
2263             (struct dzn_graphics_pipeline *)pipeline;
2264          ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2265          ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
2266          dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
2267       } else {
2268          ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2269       }
2270    }
2271 
2272    ID3D12PipelineState *new_pipeline_state = pipeline->state;
2273 
2274    if (old_pipeline_state != new_pipeline_state) {
2275       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
2276       cmdbuf->state.pipeline = pipeline;
2277    }
2278 }
2279 
2280 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2281 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2282 {
2283    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2284    struct dzn_descriptor_state *desc_state =
2285       &cmdbuf->state.bindpoint[bindpoint].desc_state;
2286    struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
2287       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
2288       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
2289    };
2290    uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
2291    bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
2292    const struct dzn_pipeline *pipeline =
2293       cmdbuf->state.bindpoint[bindpoint].pipeline;
2294 
2295    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS))
2296       goto set_heaps;
2297 
2298    dzn_foreach_pool_type (type) {
2299       uint32_t desc_count = pipeline->desc_count[type];
2300       if (!desc_count)
2301          continue;
2302 
2303       struct dzn_descriptor_heap_pool *pool =
2304          type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
2305          &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
2306       struct dzn_descriptor_heap *dst_heap = NULL;
2307       uint32_t dst_heap_offset = 0;
2308 
2309       dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
2310                                            &dst_heap, &dst_heap_offset);
2311       new_heap_offsets[type] = dst_heap_offset;
2312       update_root_desc_table[type] = true;
2313 
2314       for (uint32_t s = 0; s < MAX_SETS; s++) {
2315          const struct dzn_descriptor_set *set = desc_state->sets[s].set;
2316          if (!set) continue;
2317 
2318          uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
2319          uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type];
2320          if (set_desc_count) {
2321             mtx_lock(&set->pool->defragment_lock);
2322             dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset,
2323                                      &set->pool->heaps[type], set->heap_offsets[type],
2324                                      set_desc_count);
2325             mtx_unlock(&set->pool->defragment_lock);
2326          }
2327 
2328          if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
2329             uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
2330             for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
2331                uint32_t desc_heap_offset =
2332                   pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv;
2333                struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
2334                bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
2335 
2336                dzn_descriptor_heap_write_buffer_desc(dst_heap,
2337                                                      dst_heap_offset + set_heap_offset + desc_heap_offset,
2338                                                      false, &bdesc);
2339 
2340                if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) {
2341                   desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav;
2342                   dzn_descriptor_heap_write_buffer_desc(dst_heap,
2343                                                         dst_heap_offset + set_heap_offset + desc_heap_offset,
2344                                                         true, &bdesc);
2345                }
2346             }
2347          }
2348       }
2349 
2350       new_heaps[type] = dst_heap;
2351    }
2352 
2353 set_heaps:
2354    if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
2355        new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
2356       ID3D12DescriptorHeap *desc_heaps[2];
2357       uint32_t num_desc_heaps = 0;
2358       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
2359          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
2360       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
2361          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
2362       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
2363 
2364       for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
2365          cmdbuf->state.heaps[h] = new_heaps[h];
2366    }
2367 
2368    for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
2369       D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
2370 
2371       if (!update_root_desc_table[type])
2372          continue;
2373 
2374       D3D12_GPU_DESCRIPTOR_HANDLE handle =
2375          dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
2376 
2377       if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2378          ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2379       else
2380          ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2381    }
2382 }
2383 
2384 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2385 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2386 {
2387    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
2388       return;
2389 
2390    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2391    uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
2392 
2393    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2394       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2395                                                      sizeof(cmdbuf->state.sysvals.gfx) / 4,
2396                                                      &cmdbuf->state.sysvals.gfx, 0);
2397    } else {
2398       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2399                                                     sizeof(cmdbuf->state.sysvals.compute) / 4,
2400                                                     &cmdbuf->state.sysvals.compute, 0);
2401    }
2402 }
2403 
2404 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)2405 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
2406 {
2407    const struct dzn_graphics_pipeline *pipeline =
2408       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2409 
2410    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
2411        !pipeline->vp.count)
2412       return;
2413 
2414    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
2415 }
2416 
2417 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)2418 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
2419 {
2420    const struct dzn_graphics_pipeline *pipeline =
2421       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2422 
2423    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
2424       return;
2425 
2426    if (!pipeline->scissor.count) {
2427       /* Apply a scissor delimiting the render area. */
2428       ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
2429       return;
2430    }
2431 
2432    D3D12_RECT scissors[MAX_SCISSOR];
2433 
2434    memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
2435    for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
2436       scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
2437       scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
2438       scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
2439       scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
2440    }
2441 
2442    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
2443 }
2444 
2445 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)2446 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
2447 {
2448    unsigned start, end;
2449 
2450    BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
2451       ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views);
2452 
2453    BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
2454 }
2455 
2456 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)2457 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
2458 {
2459    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
2460       return;
2461 
2462    ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
2463 }
2464 
2465 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)2466 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2467 {
2468    struct dzn_cmd_buffer_push_constant_state *state =
2469       bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
2470       &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
2471 
2472    uint32_t offset = state->offset / 4;
2473    uint32_t end = ALIGN(state->end, 4) / 4;
2474    uint32_t count = end - offset;
2475 
2476    if (!count)
2477       return;
2478 
2479    uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
2480    uint32_t *vals = state->values + offset;
2481 
2482    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2483       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2484    else
2485       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2486 
2487    state->offset = 0;
2488    state->end = 0;
2489 }
2490 
2491 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)2492 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
2493 {
2494    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
2495       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
2496          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2497       uint32_t ref =
2498          gfx->zsa.stencil_test.front.uses_ref ?
2499          cmdbuf->state.zsa.stencil_test.front.ref :
2500          cmdbuf->state.zsa.stencil_test.back.ref;
2501       ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
2502    }
2503 }
2504 
2505 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)2506 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
2507 {
2508    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
2509       ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
2510                                                   cmdbuf->state.blend.constants);
2511 }
2512 
2513 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)2514 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
2515 {
2516    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
2517       ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
2518                                                   cmdbuf->state.zsa.depth_bounds.min,
2519                                                   cmdbuf->state.zsa.depth_bounds.max);
2520    }
2521 }
2522 
2523 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)2524 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
2525 {
2526    uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
2527    uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
2528 
2529    *vertex_count = triangle_count * 3;
2530    if (!*vertex_count)
2531       return VK_SUCCESS;
2532 
2533    ID3D12Resource *index_buf;
2534    VkResult result =
2535       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
2536                                         D3D12_HEAP_TYPE_UPLOAD,
2537                                         D3D12_RESOURCE_STATE_GENERIC_READ,
2538                                         &index_buf);
2539    if (result != VK_SUCCESS)
2540       return result;
2541 
2542    void *cpu_ptr;
2543    ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
2544 
2545    /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
2546    if (index_size == 2) {
2547       uint16_t *indices = (uint16_t *)cpu_ptr;
2548       for (uint32_t t = 0; t < triangle_count; t++) {
2549          indices[t * 3] = t + 1;
2550          indices[(t * 3) + 1] = t + 2;
2551          indices[(t * 3) + 2] = 0;
2552       }
2553       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
2554    } else {
2555       uint32_t *indices = (uint32_t *)cpu_ptr;
2556       for (uint32_t t = 0; t < triangle_count; t++) {
2557          indices[t * 3] = t + 1;
2558          indices[(t * 3) + 1] = t + 2;
2559          indices[(t * 3) + 2] = 0;
2560       }
2561       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2562    }
2563 
2564    cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
2565    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf);
2566    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2567    return VK_SUCCESS;
2568 }
2569 
2570 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)2571 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
2572                                           uint32_t *index_count,
2573                                           uint32_t *first_index)
2574 {
2575    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2576    uint32_t triangle_count = MAX2(*index_count, 2) - 2;
2577 
2578    *index_count = triangle_count * 3;
2579    if (!*index_count)
2580       return VK_SUCCESS;
2581 
2582    /* New index is always 32bit to make the compute shader rewriting the
2583     * index simpler */
2584    ID3D12Resource *new_index_buf;
2585    VkResult result =
2586       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
2587                                         D3D12_HEAP_TYPE_DEFAULT,
2588                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2589                                         &new_index_buf);
2590    if (result != VK_SUCCESS)
2591       return result;
2592 
2593    D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
2594       cmdbuf->state.ib.view.BufferLocation;
2595 
2596    ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
2597       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2598    ASSERTED bool prim_restart =
2599       dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
2600 
2601    assert(!prim_restart);
2602 
2603    enum dzn_index_type index_type =
2604       dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
2605    const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2606       &device->triangle_fan[index_type];
2607 
2608    struct dzn_triangle_fan_rewrite_index_params params = {
2609       .first_index = *first_index,
2610    };
2611 
2612    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2613    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2614    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
2615    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
2616                                                  &params, 0);
2617    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
2618    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
2619 
2620    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
2621                                             D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2622                                             D3D12_RESOURCE_STATE_INDEX_BUFFER,
2623                                             DZN_QUEUE_TRANSITION_FLUSH);
2624 
2625    /* We don't mess up with the driver state when executing our internal
2626     * compute shader, but we still change the D3D12 state, so let's mark
2627     * things dirty if needed.
2628     */
2629    cmdbuf->state.pipeline = NULL;
2630    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2631       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2632          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2633    }
2634 
2635    cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
2636    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
2637    cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2638    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2639    *first_index = 0;
2640    return VK_SUCCESS;
2641 }
2642 
2643 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)2644 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2645 {
2646    if (indexed)
2647       dzn_cmd_buffer_update_ibview(cmdbuf);
2648 
2649    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2650    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2651    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2652    dzn_cmd_buffer_update_viewports(cmdbuf);
2653    dzn_cmd_buffer_update_scissors(cmdbuf);
2654    dzn_cmd_buffer_update_vbviews(cmdbuf);
2655    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2656    dzn_cmd_buffer_update_zsa(cmdbuf);
2657    dzn_cmd_buffer_update_blend_constants(cmdbuf);
2658    dzn_cmd_buffer_update_depth_bounds(cmdbuf);
2659 
2660    /* Reset the dirty states */
2661    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0;
2662    cmdbuf->state.dirty = 0;
2663 }
2664 
2665 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)2666 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2667 {
2668    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2669       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2670 
2671    if (!pipeline->ia.triangle_fan)
2672       return 0;
2673 
2674    uint32_t max_triangles;
2675 
2676    if (indexed) {
2677       uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
2678       uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
2679 
2680       max_triangles = MAX2(max_indices, 2) - 2;
2681    } else {
2682       uint32_t max_vertex = 0;
2683       for (uint32_t i = 0; i < pipeline->vb.count; i++) {
2684          max_vertex =
2685             MAX2(max_vertex,
2686                  cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
2687       }
2688 
2689       max_triangles = MAX2(max_vertex, 2) - 2;
2690    }
2691 
2692    return max_triangles * 3;
2693 }
2694 
2695 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)2696 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
2697                              ID3D12Resource *draw_buf,
2698                              size_t draw_buf_offset,
2699                              ID3D12Resource *count_buf,
2700                              size_t count_buf_offset,
2701                              uint32_t max_draw_count,
2702                              uint32_t draw_buf_stride,
2703                              bool indexed)
2704 {
2705    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2706    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2707       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2708    uint32_t min_draw_buf_stride =
2709       indexed ?
2710       sizeof(struct dzn_indirect_indexed_draw_params) :
2711       sizeof(struct dzn_indirect_draw_params);
2712    bool prim_restart =
2713       dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
2714 
2715    draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
2716    assert(draw_buf_stride >= min_draw_buf_stride);
2717    assert((draw_buf_stride & 3) == 0);
2718 
2719    uint32_t triangle_fan_index_buf_stride =
2720       dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
2721       sizeof(uint32_t);
2722    uint32_t exec_buf_stride =
2723       triangle_fan_index_buf_stride > 0 ?
2724       sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
2725       sizeof(struct dzn_indirect_draw_exec_params);
2726    uint32_t triangle_fan_exec_buf_stride =
2727       sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
2728    uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
2729    uint32_t exec_buf_draw_offset = 0;
2730 
2731    // We reserve the first slot for the draw_count value when indirect count is
2732    // involved.
2733    if (count_buf != NULL) {
2734       exec_buf_size += exec_buf_stride;
2735       exec_buf_draw_offset = exec_buf_stride;
2736    }
2737 
2738    ID3D12Resource *exec_buf;
2739    VkResult result =
2740       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
2741                                         D3D12_HEAP_TYPE_DEFAULT,
2742                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2743                                         &exec_buf);
2744    if (result != VK_SUCCESS)
2745       return;
2746 
2747    D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
2748       ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
2749    ID3D12Resource *triangle_fan_index_buf = NULL;
2750    ID3D12Resource *triangle_fan_exec_buf = NULL;
2751 
2752    if (triangle_fan_index_buf_stride) {
2753       result =
2754          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2755                                            max_draw_count * triangle_fan_index_buf_stride,
2756                                            D3D12_HEAP_TYPE_DEFAULT,
2757                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2758                                            &triangle_fan_index_buf);
2759       if (result != VK_SUCCESS)
2760          return;
2761 
2762       result =
2763          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2764                                            max_draw_count * triangle_fan_exec_buf_stride,
2765                                            D3D12_HEAP_TYPE_DEFAULT,
2766                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2767                                            &triangle_fan_exec_buf);
2768       if (result != VK_SUCCESS)
2769          return;
2770    }
2771 
2772    struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
2773       .draw_buf_stride = draw_buf_stride,
2774       .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
2775       .triangle_fan_index_buf_start =
2776          triangle_fan_index_buf ?
2777          ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
2778       .exec_buf_start =
2779          prim_restart ?
2780          ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
2781    };
2782    uint32_t params_size;
2783    if (triangle_fan_index_buf_stride > 0 && prim_restart)
2784       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
2785    else if (triangle_fan_index_buf_stride > 0)
2786       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
2787    else
2788       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
2789 
2790    enum dzn_indirect_draw_type draw_type;
2791 
2792    if (indexed && triangle_fan_index_buf_stride > 0) {
2793       if (prim_restart && count_buf)
2794          draw_type =  DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
2795       else if (prim_restart && !count_buf)
2796          draw_type =  DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
2797       else if (!prim_restart && count_buf)
2798          draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
2799       else
2800          draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
2801    } else if (!indexed && triangle_fan_index_buf_stride > 0) {
2802       draw_type = count_buf ?
2803                   DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
2804                   DZN_INDIRECT_DRAW_TRIANGLE_FAN;
2805    } else if (indexed) {
2806       draw_type = count_buf ?
2807                   DZN_INDIRECT_INDEXED_DRAW_COUNT :
2808                   DZN_INDIRECT_INDEXED_DRAW;
2809    } else {
2810       draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW;
2811    }
2812 
2813    struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
2814    uint32_t root_param_idx = 0;
2815 
2816    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
2817    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
2818    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2819                                                            params_size / 4, (const void *)&params, 0);
2820    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
2821                                                                draw_buf_gpu);
2822    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2823                                                                 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
2824    if (count_buf) {
2825       ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2826                                                                   root_param_idx++,
2827                                                                   ID3D12Resource_GetGPUVirtualAddress(count_buf) +
2828                                                                   count_buf_offset);
2829    }
2830 
2831    if (triangle_fan_exec_buf) {
2832       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
2833                                                                    root_param_idx++,
2834                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
2835    }
2836 
2837    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
2838 
2839    D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
2840 
2841    if (triangle_fan_exec_buf) {
2842       enum dzn_index_type index_type =
2843          indexed ?
2844          dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
2845          DZN_NO_INDEX;
2846       struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2847          &device->triangle_fan[index_type];
2848 
2849       struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
2850 
2851       assert(rewrite_index->root_sig);
2852       assert(rewrite_index->pipeline_state);
2853       assert(rewrite_index->cmd_sig);
2854 
2855       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
2856                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2857                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2858                                                DZN_QUEUE_TRANSITION_FLUSH);
2859 
2860       ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2861       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2862       root_param_idx = 0;
2863       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2864                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
2865       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2866                                                               sizeof(rewrite_index_params) / 4,
2867                                                               (const void *)&rewrite_index_params, 0);
2868 
2869       if (indexed) {
2870          ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2871                                                                      root_param_idx++,
2872                                                                      cmdbuf->state.ib.view.BufferLocation);
2873       }
2874 
2875       ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
2876                                                  max_draw_count, triangle_fan_exec_buf, 0,
2877                                                  count_buf ? exec_buf : NULL, 0);
2878 
2879       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
2880                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2881                                                D3D12_RESOURCE_STATE_INDEX_BUFFER,
2882                                                DZN_QUEUE_TRANSITION_FLUSH);
2883 
2884       /* After our triangle-fan lowering the draw is indexed */
2885       indexed = true;
2886       ib_view = cmdbuf->state.ib.view;
2887       cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
2888       cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
2889       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2890       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2891    }
2892 
2893    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
2894                                             D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2895                                             D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2896                                             DZN_QUEUE_TRANSITION_FLUSH);
2897 
2898    /* We don't mess up with the driver state when executing our internal
2899     * compute shader, but we still change the D3D12 state, so let's mark
2900     * things dirty if needed.
2901     */
2902    cmdbuf->state.pipeline = NULL;
2903    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2904       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2905          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2906    }
2907 
2908    cmdbuf->state.sysvals.gfx.first_vertex = 0;
2909    cmdbuf->state.sysvals.gfx.base_instance = 0;
2910    cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
2911    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
2912       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
2913 
2914    dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
2915 
2916    /* Restore the old IB view if we modified it during the triangle fan lowering */
2917    if (ib_view.SizeInBytes) {
2918       cmdbuf->state.ib.view = ib_view;
2919       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2920    }
2921 
2922    enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
2923       triangle_fan_index_buf_stride > 0 ?
2924       DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
2925       indexed ?
2926       DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
2927       DZN_INDIRECT_DRAW_CMD_SIG;
2928    ID3D12CommandSignature *cmdsig =
2929       dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
2930 
2931    if (!cmdsig) {
2932       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2933       return;
2934    }
2935 
2936    ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
2937                                               max_draw_count,
2938                                               exec_buf, exec_buf_draw_offset,
2939                                               count_buf ? exec_buf : NULL, 0);
2940 }
2941 
2942 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)2943 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
2944 {
2945    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2946    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2947    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2948    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2949 
2950    /* Reset the dirty states */
2951    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0;
2952 }
2953 
2954 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)2955 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
2956                    const VkCopyBufferInfo2 *info)
2957 {
2958    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2959    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2960    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2961 
2962    for (int i = 0; i < info->regionCount; i++) {
2963       const VkBufferCopy2 *region = info->pRegions + i;
2964 
2965       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
2966                                         src_buffer->res, region->srcOffset,
2967                                         region->size);
2968    }
2969 }
2970 
2971 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)2972 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
2973                           const VkCopyBufferToImageInfo2 *info)
2974 {
2975    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2976 
2977    for (int i = 0; i < info->regionCount; i++) {
2978       const VkBufferImageCopy2 *region = info->pRegions + i;
2979 
2980       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2981          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2982             dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
2983       }
2984    }
2985 }
2986 
2987 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)2988 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
2989                           const VkCopyImageToBufferInfo2 *info)
2990 {
2991    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2992 
2993    for (int i = 0; i < info->regionCount; i++) {
2994       const VkBufferImageCopy2 *region = info->pRegions + i;
2995 
2996       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2997          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2998             dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
2999       }
3000    }
3001 }
3002 
3003 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)3004 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
3005                   const VkCopyImageInfo2 *info)
3006 {
3007    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3008    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3009    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3010    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3011 
3012    assert(src->vk.samples == dst->vk.samples);
3013 
3014    bool requires_temp_res = false;
3015 
3016    for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
3017       const VkImageCopy2 *region = &info->pRegions[i];
3018 
3019       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3020          assert(aspect & region->dstSubresource.aspectMask);
3021 
3022          if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
3023                                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
3024              src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
3025              dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
3026             requires_temp_res = true;
3027             break;
3028          }
3029       }
3030    }
3031 
3032    bool use_blit = false;
3033    if (src->vk.samples > 1) {
3034       use_blit = requires_temp_res;
3035 
3036       for (int i = 0; i < info->regionCount; i++) {
3037          const VkImageCopy2 *region = info->pRegions + i;
3038          if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
3039              region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
3040              region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
3041              region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
3042              region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
3043              region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
3044             use_blit = true;
3045       }
3046    }
3047 
3048    if (use_blit) {
3049       /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
3050        * is issued on a transfer queue, but we don't have any better option
3051        * right now...
3052        */
3053       STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
3054 
3055       VkBlitImageInfo2 blit_info = {
3056          .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
3057          .srcImage = info->srcImage,
3058          .srcImageLayout = info->srcImageLayout,
3059          .dstImage = info->dstImage,
3060          .dstImageLayout = info->dstImageLayout,
3061          .regionCount = info->regionCount,
3062          .pRegions = blit_regions,
3063          .filter = VK_FILTER_NEAREST,
3064       };
3065 
3066       for (uint32_t r = 0; r < info->regionCount; r++) {
3067          blit_regions[r] = (VkImageBlit2) {
3068             .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3069             .srcSubresource = info->pRegions[r].srcSubresource,
3070             .srcOffsets = {
3071                 info->pRegions[r].srcOffset,
3072                 info->pRegions[r].srcOffset,
3073             },
3074             .dstSubresource = info->pRegions[r].dstSubresource,
3075             .dstOffsets = {
3076                 info->pRegions[r].dstOffset,
3077                 info->pRegions[r].dstOffset,
3078             },
3079          };
3080 
3081          blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
3082          blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
3083          blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
3084          blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
3085          blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
3086          blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
3087       }
3088 
3089       dzn_CmdBlitImage2(commandBuffer, &blit_info);
3090 
3091       STACK_ARRAY_FINISH(blit_regions);
3092       return;
3093    }
3094 
3095    D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
3096    D3D12_RESOURCE_DESC tmp_desc = {
3097       .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
3098       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
3099       .DepthOrArraySize = 1,
3100       .MipLevels = 1,
3101       .Format = src->desc.Format,
3102       .SampleDesc = { .Count = 1, .Quality = 0 },
3103       .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
3104       .Flags = D3D12_RESOURCE_FLAG_NONE,
3105    };
3106 
3107    if (requires_temp_res) {
3108       ID3D12Device2 *dev = device->dev;
3109       VkImageAspectFlags aspect = 0;
3110       uint64_t max_size = 0;
3111 
3112       if (vk_format_has_depth(src->vk.format))
3113          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3114       else if (vk_format_has_stencil(src->vk.format))
3115          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3116       else
3117          aspect = VK_IMAGE_ASPECT_COLOR_BIT;
3118 
3119       for (uint32_t i = 0; i < info->regionCount; i++) {
3120          const VkImageCopy2 *region = &info->pRegions[i];
3121          uint64_t region_size = 0;
3122 
3123          tmp_desc.Format =
3124             dzn_image_get_dxgi_format(src->vk.format,
3125                                       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
3126                                       aspect);
3127          tmp_desc.Width = region->extent.width;
3128          tmp_desc.Height = region->extent.height;
3129 
3130          ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
3131                                              0, 1, 0,
3132                                              NULL, NULL, NULL,
3133                                              &region_size);
3134          max_size = MAX2(max_size, region_size * region->extent.depth);
3135       }
3136 
3137       VkResult result =
3138          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
3139                                            D3D12_HEAP_TYPE_DEFAULT,
3140                                            D3D12_RESOURCE_STATE_COPY_DEST,
3141                                            &tmp_loc.pResource);
3142       if (result != VK_SUCCESS)
3143          return;
3144 
3145       tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
3146    }
3147 
3148    for (int i = 0; i < info->regionCount; i++) {
3149       const VkImageCopy2 *region = &info->pRegions[i];
3150 
3151       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3152          for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++)
3153             dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
3154       }
3155    }
3156 }
3157 
3158 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)3159 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
3160                   const VkBlitImageInfo2 *info)
3161 {
3162    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3163    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3164 
3165    if (info->regionCount == 0)
3166       return;
3167 
3168    uint32_t desc_count = 0;
3169    for (uint32_t r = 0; r < info->regionCount; r++)
3170       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3171 
3172    struct dzn_descriptor_heap *heap;
3173    uint32_t heap_slot;
3174    VkResult result =
3175       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3176                                            desc_count, &heap, &heap_slot);
3177 
3178    if (result != VK_SUCCESS) {
3179       cmdbuf->error = result;
3180       return;
3181    }
3182 
3183    if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3184       ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3185       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3186       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3187    }
3188 
3189    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3190 
3191    for (uint32_t r = 0; r < info->regionCount; r++)
3192       dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r);
3193 
3194    cmdbuf->state.pipeline = NULL;
3195    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3196    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3197       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3198          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3199    }
3200 }
3201 
3202 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)3203 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
3204                      const VkResolveImageInfo2 *info)
3205 {
3206    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3207    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3208 
3209    if (info->regionCount == 0)
3210       return;
3211 
3212    uint32_t desc_count = 0;
3213    for (uint32_t r = 0; r < info->regionCount; r++)
3214       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3215 
3216    struct dzn_descriptor_heap *heap;
3217    uint32_t heap_slot;
3218    VkResult result =
3219       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3220                                            desc_count, &heap, &heap_slot);
3221    if (result != VK_SUCCESS) {
3222       cmdbuf->error = result;
3223       return;
3224    }
3225 
3226    if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3227       ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3228       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3229       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3230    }
3231 
3232    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3233 
3234    uint32_t heap_offset = 0;
3235    for (uint32_t r = 0; r < info->regionCount; r++)
3236       dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r);
3237 
3238    cmdbuf->state.pipeline = NULL;
3239    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3240    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3241       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3242          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3243    }
3244 }
3245 
3246 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3247 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
3248                        VkImage image,
3249                        VkImageLayout imageLayout,
3250                        const VkClearColorValue *pColor,
3251                        uint32_t rangeCount,
3252                        const VkImageSubresourceRange *pRanges)
3253 {
3254    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3255    VK_FROM_HANDLE(dzn_image, img, image);
3256 
3257    dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
3258 }
3259 
3260 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)3261 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
3262                               VkImage image,
3263                               VkImageLayout imageLayout,
3264                               const VkClearDepthStencilValue *pDepthStencil,
3265                               uint32_t rangeCount,
3266                               const VkImageSubresourceRange *pRanges)
3267 {
3268    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3269    VK_FROM_HANDLE(dzn_image, img, image);
3270 
3271    dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
3272 }
3273 
3274 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)3275 dzn_CmdDispatch(VkCommandBuffer commandBuffer,
3276                 uint32_t groupCountX,
3277                 uint32_t groupCountY,
3278                 uint32_t groupCountZ)
3279 {
3280    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3281 
3282    cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
3283    cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
3284    cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
3285    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3286       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3287 
3288    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
3289    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
3290 }
3291 
3292 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)3293 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
3294                   VkBuffer dstBuffer,
3295                   VkDeviceSize dstOffset,
3296                   VkDeviceSize size,
3297                   uint32_t data)
3298 {
3299    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3300    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3301 
3302    if (size == VK_WHOLE_SIZE)
3303       size = buf->size - dstOffset;
3304 
3305    size &= ~3ULL;
3306 
3307    ID3D12Resource *src_res;
3308    VkResult result =
3309       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3310                                         D3D12_HEAP_TYPE_UPLOAD,
3311                                         D3D12_RESOURCE_STATE_GENERIC_READ,
3312                                         &src_res);
3313    if (result != VK_SUCCESS)
3314       return;
3315 
3316    uint32_t *cpu_ptr;
3317    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
3318    for (uint32_t i = 0; i < size / 4; i++)
3319       cpu_ptr[i] = data;
3320 
3321    ID3D12Resource_Unmap(src_res, 0, NULL);
3322 
3323    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3324 }
3325 
3326 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)3327 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
3328                     VkBuffer dstBuffer,
3329                     VkDeviceSize dstOffset,
3330                     VkDeviceSize size,
3331                     const void *data)
3332 {
3333    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3334    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3335 
3336    if (size == VK_WHOLE_SIZE)
3337       size = buf->size - dstOffset;
3338 
3339    /*
3340     * The spec says:
3341     *   4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
3342     *   buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
3343     *   is not a multiple of 4, then the nearest smaller multiple is used."
3344     */
3345    size &= ~3ULL;
3346 
3347    ID3D12Resource *src_res;
3348    VkResult result =
3349       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3350                                         D3D12_HEAP_TYPE_UPLOAD,
3351                                         D3D12_RESOURCE_STATE_GENERIC_READ,
3352                                         &src_res);
3353    if (result != VK_SUCCESS)
3354       return;
3355 
3356    void *cpu_ptr;
3357    ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
3358    memcpy(cpu_ptr, data, size),
3359    ID3D12Resource_Unmap(src_res, 0, NULL);
3360 
3361    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3362 }
3363 
3364 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)3365 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
3366                         uint32_t attachmentCount,
3367                         const VkClearAttachment *pAttachments,
3368                         uint32_t rectCount,
3369                         const VkClearRect *pRects)
3370 {
3371    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3372 
3373    for (unsigned i = 0; i < attachmentCount; i++) {
3374       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3375       struct dzn_image_view *view = NULL;
3376 
3377       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
3378          assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
3379          view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
3380          layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
3381       } else {
3382          if (cmdbuf->state.render.attachments.depth.iview &&
3383              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
3384             view = cmdbuf->state.render.attachments.depth.iview;
3385             layout = cmdbuf->state.render.attachments.depth.layout;
3386          }
3387 
3388          if (cmdbuf->state.render.attachments.stencil.iview &&
3389              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
3390             assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
3391             view = cmdbuf->state.render.attachments.stencil.iview;
3392             layout = cmdbuf->state.render.attachments.stencil.layout;
3393          }
3394       }
3395 
3396       if (!view)
3397          continue;
3398 
3399       for (uint32_t j = 0; j < rectCount; j++) {
3400          D3D12_RECT rect;
3401 
3402          dzn_translate_rect(&rect, &pRects[j].rect);
3403          dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
3404                                          &pAttachments[i].clearValue,
3405                                          pAttachments[i].aspectMask,
3406                                          pRects[j].baseArrayLayer,
3407                                          pRects[j].layerCount,
3408                                          1, &rect);
3409       }
3410    }
3411 }
3412 
3413 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect)3414 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
3415                                             const struct dzn_rendering_attachment *att,
3416                                             VkImageAspectFlagBits aspect)
3417 {
3418    struct dzn_image_view *src = att->iview;
3419    struct dzn_image_view *dst = att->resolve.iview;
3420 
3421    if (!src || !dst)
3422       return;
3423 
3424    VkImageLayout src_layout = att->layout;
3425    VkImageLayout dst_layout = att->resolve.layout;
3426    struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
3427    D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect);
3428    struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
3429    D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect);
3430 
3431    VkImageSubresourceRange src_range = {
3432       .aspectMask = (VkImageAspectFlags)aspect,
3433       .baseMipLevel = src->vk.base_mip_level,
3434       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3435       .baseArrayLayer = src->vk.base_array_layer,
3436       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3437    };
3438 
3439    VkImageSubresourceRange dst_range = {
3440       .aspectMask = (VkImageAspectFlags)aspect,
3441       .baseMipLevel = dst->vk.base_mip_level,
3442       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3443       .baseArrayLayer = dst->vk.base_array_layer,
3444       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3445    };
3446 
3447    dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3448                                                      src_state,
3449                                                      D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3450                                                      DZN_QUEUE_TRANSITION_FLUSH);
3451    dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3452                                                      dst_state,
3453                                                      D3D12_RESOURCE_STATE_RESOLVE_DEST,
3454                                                      DZN_QUEUE_TRANSITION_FLUSH);
3455 
3456    for (uint32_t level = 0; level < src_range.levelCount; level++) {
3457       for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
3458          uint32_t src_subres =
3459             dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
3460          uint32_t dst_subres =
3461             dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
3462 
3463          ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
3464                                                        dst_img->res, dst_subres,
3465                                                        src_img->res, src_subres,
3466                                                        dst->srv_desc.Format);
3467       }
3468    }
3469 
3470    dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3471                                                      D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3472                                                      src_state,
3473                                                      DZN_QUEUE_TRANSITION_FLUSH);
3474    dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3475                                                      D3D12_RESOURCE_STATE_RESOLVE_DEST,
3476                                                      dst_state,
3477                                                      DZN_QUEUE_TRANSITION_FLUSH);
3478 }
3479 
3480 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)3481 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
3482                                             const VkRenderingAttachmentInfo *att,
3483                                             VkImageAspectFlagBits aspect)
3484 {
3485    const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
3486       vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
3487    VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3488 
3489    if (!initial_layout || !iview)
3490       return;
3491 
3492    struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
3493    const VkImageSubresourceRange range = {
3494       .aspectMask = aspect,
3495       .baseMipLevel = iview->vk.base_mip_level,
3496       .levelCount = iview->vk.level_count,
3497       .baseArrayLayer = iview->vk.base_array_layer,
3498       .layerCount = iview->vk.layer_count,
3499    };
3500 
3501    dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
3502                                                       initial_layout->initialLayout,
3503                                                       att->imageLayout,
3504                                                       DZN_QUEUE_TRANSITION_FLUSH);
3505 }
3506 
3507 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)3508 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
3509                       const VkRenderingInfo *pRenderingInfo)
3510 {
3511    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3512 
3513    D3D12_RECT new_render_area = {
3514       .left = pRenderingInfo->renderArea.offset.x,
3515       .top = pRenderingInfo->renderArea.offset.y,
3516       .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
3517       .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
3518    };
3519 
3520    // The render area has an impact on the scissor state.
3521    if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
3522       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3523       cmdbuf->state.render.area = new_render_area;
3524    }
3525 
3526    cmdbuf->state.render.flags = pRenderingInfo->flags;
3527    cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
3528    cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
3529 
3530    D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
3531    D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
3532 
3533    cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
3534    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
3535       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
3536       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3537 
3538       cmdbuf->state.render.attachments.colors[i].iview = iview;
3539       cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
3540       cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
3541       cmdbuf->state.render.attachments.colors[i].resolve.iview =
3542          dzn_image_view_from_handle(att->resolveImageView);
3543       cmdbuf->state.render.attachments.colors[i].resolve.layout =
3544          att->resolveImageLayout;
3545       cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
3546 
3547       if (!iview) {
3548          rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
3549          continue;
3550       }
3551 
3552       struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3553       rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
3554       dzn_rendering_attachment_initial_transition(cmdbuf, att,
3555                                                   VK_IMAGE_ASPECT_COLOR_BIT);
3556    }
3557 
3558    if (pRenderingInfo->pDepthAttachment) {
3559       const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
3560 
3561       cmdbuf->state.render.attachments.depth.iview =
3562          dzn_image_view_from_handle(att->imageView);
3563       cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
3564       cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
3565       cmdbuf->state.render.attachments.depth.resolve.iview =
3566          dzn_image_view_from_handle(att->resolveImageView);
3567       cmdbuf->state.render.attachments.depth.resolve.layout =
3568          att->resolveImageLayout;
3569       cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
3570       dzn_rendering_attachment_initial_transition(cmdbuf, att,
3571                                                   VK_IMAGE_ASPECT_DEPTH_BIT);
3572    }
3573 
3574    if (pRenderingInfo->pStencilAttachment) {
3575       const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
3576 
3577       cmdbuf->state.render.attachments.stencil.iview =
3578          dzn_image_view_from_handle(att->imageView);
3579       cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
3580       cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
3581       cmdbuf->state.render.attachments.stencil.resolve.iview =
3582          dzn_image_view_from_handle(att->resolveImageView);
3583       cmdbuf->state.render.attachments.stencil.resolve.layout =
3584          att->resolveImageLayout;
3585       cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
3586       dzn_rendering_attachment_initial_transition(cmdbuf, att,
3587                                                   VK_IMAGE_ASPECT_STENCIL_BIT);
3588    }
3589 
3590    if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3591       struct dzn_image_view *z_iview =
3592          pRenderingInfo->pDepthAttachment ?
3593          dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
3594          NULL;
3595       struct dzn_image_view *s_iview =
3596          pRenderingInfo->pStencilAttachment ?
3597          dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
3598          NULL;
3599       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3600       assert(!z_iview || !s_iview || z_iview == s_iview);
3601 
3602       if (iview) {
3603          struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3604 
3605          zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
3606       }
3607    }
3608 
3609    ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
3610                                                  pRenderingInfo->colorAttachmentCount,
3611                                                  pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
3612                                                  FALSE, zs_handle.ptr ? &zs_handle : NULL);
3613 
3614    for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
3615       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
3616       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3617 
3618       if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3619          dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
3620                                          &att->clearValue,
3621                                          VK_IMAGE_ASPECT_COLOR_BIT, 0,
3622                                          VK_REMAINING_ARRAY_LAYERS, 1,
3623                                          &cmdbuf->state.render.area);
3624       }
3625    }
3626 
3627    if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3628       const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
3629       const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
3630       struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
3631       struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
3632       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3633       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3634 
3635       assert(!z_iview || !s_iview || z_iview == s_iview);
3636 
3637       VkImageAspectFlags aspects = 0;
3638       VkClearValue clear_val;
3639 
3640       if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3641          aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
3642          clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
3643          layout = z_att->imageLayout;
3644       }
3645 
3646       if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3647          aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
3648          clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
3649          layout = s_att->imageLayout;
3650       }
3651 
3652       if (aspects != 0) {
3653          dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
3654                                          &clear_val, aspects, 0,
3655                                          VK_REMAINING_ARRAY_LAYERS, 1,
3656                                          &cmdbuf->state.render.area);
3657       }
3658    }
3659 }
3660 
3661 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)3662 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
3663 {
3664    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3665 
3666    for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
3667       dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3668                                                   &cmdbuf->state.render.attachments.colors[i],
3669                                                   VK_IMAGE_ASPECT_COLOR_BIT);
3670    }
3671 
3672    dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3673                                                &cmdbuf->state.render.attachments.depth,
3674                                                VK_IMAGE_ASPECT_DEPTH_BIT);
3675    dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3676                                                &cmdbuf->state.render.attachments.stencil,
3677                                                VK_IMAGE_ASPECT_STENCIL_BIT);
3678 
3679    memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
3680 }
3681 
3682 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)3683 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
3684                     VkPipelineBindPoint pipelineBindPoint,
3685                     VkPipeline pipe)
3686 {
3687    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3688    VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
3689 
3690    cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
3691    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3692    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3693       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
3694 
3695       if (!gfx->vp.dynamic) {
3696          memcpy(cmdbuf->state.viewports, gfx->vp.desc,
3697                 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
3698          cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3699       }
3700 
3701       if (!gfx->scissor.dynamic) {
3702          memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
3703                 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
3704          cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3705       }
3706 
3707       if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
3708          cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
3709          cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
3710          cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3711       }
3712 
3713       if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
3714          cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
3715          cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
3716          cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
3717       }
3718 
3719       if (!gfx->blend.dynamic_constants) {
3720          memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
3721                 sizeof(cmdbuf->state.blend.constants));
3722          cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
3723       }
3724 
3725       for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
3726          cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
3727 
3728       if (gfx->vb.count > 0)
3729          BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
3730    }
3731 }
3732 
3733 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)3734 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
3735                           VkPipelineBindPoint pipelineBindPoint,
3736                           VkPipelineLayout layout,
3737                           uint32_t firstSet,
3738                           uint32_t descriptorSetCount,
3739                           const VkDescriptorSet *pDescriptorSets,
3740                           uint32_t dynamicOffsetCount,
3741                           const uint32_t *pDynamicOffsets)
3742 {
3743    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3744    VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
3745 
3746    struct dzn_descriptor_state *desc_state =
3747       &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
3748    uint32_t dirty = 0;
3749 
3750    for (uint32_t i = 0; i < descriptorSetCount; i++) {
3751       uint32_t idx = firstSet + i;
3752       VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
3753 
3754       if (desc_state->sets[idx].set != set) {
3755          desc_state->sets[idx].set = set;
3756          dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3757       }
3758 
3759       uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
3760       if (dynamic_buffer_count) {
3761          assert(dynamicOffsetCount >= dynamic_buffer_count);
3762 
3763          for (uint32_t j = 0; j < dynamic_buffer_count; j++)
3764             desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
3765 
3766          dynamicOffsetCount -= dynamic_buffer_count;
3767          pDynamicOffsets += dynamic_buffer_count;
3768          dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3769       }
3770    }
3771 
3772    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
3773 }
3774 
3775 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)3776 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
3777                    uint32_t firstViewport,
3778                    uint32_t viewportCount,
3779                    const VkViewport *pViewports)
3780 {
3781    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3782 
3783    STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
3784 
3785    for (uint32_t i = 0; i < viewportCount; i++) {
3786       uint32_t vp = i + firstViewport;
3787 
3788       dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
3789 
3790       if (pViewports[i].minDepth > pViewports[i].maxDepth)
3791          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3792       else
3793          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3794 
3795       if (pViewports[i].height > 0)
3796          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
3797       else
3798          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
3799    }
3800 
3801    if (viewportCount) {
3802       cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3803       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3804          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3805    }
3806 }
3807 
3808 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)3809 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
3810                   uint32_t firstScissor,
3811                   uint32_t scissorCount,
3812                   const VkRect2D *pScissors)
3813 {
3814    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3815 
3816    for (uint32_t i = 0; i < scissorCount; i++)
3817       dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
3818 
3819    if (scissorCount)
3820       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3821 }
3822 
3823 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)3824 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
3825                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
3826                      const void *pValues)
3827 {
3828    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3829    struct dzn_cmd_buffer_push_constant_state *states[2];
3830    uint32_t num_states = 0;
3831 
3832    if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
3833       states[num_states++] = &cmdbuf->state.push_constant.gfx;
3834 
3835    if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
3836       states[num_states++] = &cmdbuf->state.push_constant.compute;
3837 
3838    for (uint32_t i = 0; i < num_states; i++) {
3839       memcpy(((char *)states[i]->values) + offset, pValues, size);
3840       states[i]->offset =
3841          states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
3842       states[i]->end = MAX2(states[i]->end, offset + size);
3843    }
3844 }
3845 
3846 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)3847 dzn_CmdDraw(VkCommandBuffer commandBuffer,
3848             uint32_t vertexCount,
3849             uint32_t instanceCount,
3850             uint32_t firstVertex,
3851             uint32_t firstInstance)
3852 {
3853    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3854 
3855    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3856       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3857 
3858    cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
3859    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3860    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3861       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3862 
3863    if (pipeline->ia.triangle_fan) {
3864       D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3865 
3866       VkResult result =
3867          dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
3868       if (result != VK_SUCCESS || !vertexCount)
3869          return;
3870 
3871       cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3872       dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3873       ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
3874                                             firstVertex, firstInstance);
3875 
3876       /* Restore the IB view if we modified it when lowering triangle fans. */
3877       if (ib_view.SizeInBytes > 0) {
3878          cmdbuf->state.ib.view = ib_view;
3879          cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3880       }
3881    } else {
3882       cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
3883       dzn_cmd_buffer_prepare_draw(cmdbuf, false);
3884       ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
3885                                      firstVertex, firstInstance);
3886    }
3887 }
3888 
3889 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)3890 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3891                    uint32_t indexCount,
3892                    uint32_t instanceCount,
3893                    uint32_t firstIndex,
3894                    int32_t vertexOffset,
3895                    uint32_t firstInstance)
3896 {
3897    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3898 
3899    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3900       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3901 
3902    if (pipeline->ia.triangle_fan &&
3903        dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
3904       /* The indexed+primitive-restart+triangle-fan combination is a mess,
3905        * since we have to walk the index buffer, skip entries with the
3906        * special 0xffff/0xffffffff values, and push triangle list indices
3907        * for the remaining values. All of this has an impact on the index
3908        * count passed to the draw call, which forces us to use the indirect
3909        * path.
3910        */
3911       struct dzn_indirect_indexed_draw_params params = {
3912          .index_count = indexCount,
3913          .instance_count = instanceCount,
3914          .first_index = firstIndex,
3915          .vertex_offset = vertexOffset,
3916          .first_instance = firstInstance,
3917       };
3918 
3919       ID3D12Resource *draw_buf;
3920       VkResult result =
3921          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
3922                                            D3D12_HEAP_TYPE_UPLOAD,
3923                                            D3D12_RESOURCE_STATE_GENERIC_READ,
3924                                            &draw_buf);
3925       if (result != VK_SUCCESS)
3926          return;
3927 
3928       void *cpu_ptr;
3929       ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
3930       memcpy(cpu_ptr, &params, sizeof(params));
3931 
3932       ID3D12Resource_Unmap(draw_buf, 0, NULL);
3933 
3934       dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, 0, NULL, 0, 1, sizeof(params), true);
3935       return;
3936    }
3937 
3938    cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
3939    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3940    cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3941    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3942       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3943 
3944    D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3945 
3946    if (pipeline->ia.triangle_fan) {
3947       VkResult result =
3948          dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
3949       if (result != VK_SUCCESS || !indexCount)
3950          return;
3951    }
3952 
3953    dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3954    ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
3955                                          vertexOffset, firstInstance);
3956 
3957    /* Restore the IB view if we modified it when lowering triangle fans. */
3958    if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
3959       cmdbuf->state.ib.view = ib_view;
3960       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3961    }
3962 }
3963 
3964 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)3965 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3966                     VkBuffer buffer,
3967                     VkDeviceSize offset,
3968                     uint32_t drawCount,
3969                     uint32_t stride)
3970 {
3971    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3972    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3973 
3974    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
3975 }
3976 
3977 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)3978 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3979                            VkBuffer buffer,
3980                            VkDeviceSize offset,
3981                            uint32_t drawCount,
3982                            uint32_t stride)
3983 {
3984    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3985    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3986 
3987    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
3988 }
3989 
3990 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)3991 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
3992                          VkBuffer buffer,
3993                          VkDeviceSize offset,
3994                          VkBuffer countBuffer,
3995                          VkDeviceSize countBufferOffset,
3996                          uint32_t maxDrawCount,
3997                          uint32_t stride)
3998 {
3999    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4000    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4001    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4002 
4003    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4004                                 count_buf->res, countBufferOffset,
4005                                 maxDrawCount, stride, false);
4006 }
4007 
4008 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)4009 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
4010                                 VkBuffer buffer,
4011                                 VkDeviceSize offset,
4012                                 VkBuffer countBuffer,
4013                                 VkDeviceSize countBufferOffset,
4014                                 uint32_t maxDrawCount,
4015                                 uint32_t stride)
4016 {
4017    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4018    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4019    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4020 
4021    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4022                                 count_buf->res, countBufferOffset,
4023                                 maxDrawCount, stride, true);
4024 }
4025 
4026 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)4027 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
4028                          uint32_t firstBinding,
4029                          uint32_t bindingCount,
4030                          const VkBuffer *pBuffers,
4031                          const VkDeviceSize *pOffsets)
4032 {
4033    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4034 
4035    if (!bindingCount)
4036       return;
4037 
4038    D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
4039 
4040    for (uint32_t i = 0; i < bindingCount; i++) {
4041       VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
4042 
4043       vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i];
4044       vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
4045    }
4046 
4047    BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
4048                     firstBinding + bindingCount - 1);
4049 }
4050 
4051 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)4052 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
4053                        VkBuffer buffer,
4054                        VkDeviceSize offset,
4055                        VkIndexType indexType)
4056 {
4057    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4058    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4059 
4060    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset;
4061    cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
4062    switch (indexType) {
4063    case VK_INDEX_TYPE_UINT16:
4064       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
4065       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
4066       break;
4067    case VK_INDEX_TYPE_UINT32:
4068       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
4069       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
4070       break;
4071    default: unreachable("Invalid index type");
4072    }
4073 
4074    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4075 
4076    const struct dzn_graphics_pipeline *pipeline =
4077       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
4078 
4079    if (pipeline && dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
4080       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4081 }
4082 
4083 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)4084 dzn_CmdResetEvent(VkCommandBuffer commandBuffer,
4085                   VkEvent event,
4086                   VkPipelineStageFlags stageMask)
4087 {
4088    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4089    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4090    VK_FROM_HANDLE(dzn_event, evt, event);
4091 
4092    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
4093       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4094 }
4095 
4096 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags stageMask)4097 dzn_CmdSetEvent(VkCommandBuffer commandBuffer,
4098                 VkEvent event,
4099                 VkPipelineStageFlags stageMask)
4100 {
4101    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4102    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4103    VK_FROM_HANDLE(dzn_event, evt, event);
4104 
4105    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
4106       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4107 }
4108 
4109 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,uint32_t memoryBarrierCount,const VkMemoryBarrier * pMemoryBarriers,uint32_t bufferMemoryBarrierCount,const VkBufferMemoryBarrier * pBufferMemoryBarriers,uint32_t imageMemoryBarrierCount,const VkImageMemoryBarrier * pImageMemoryBarriers)4110 dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,
4111                   uint32_t eventCount,
4112                   const VkEvent *pEvents,
4113                   VkPipelineStageFlags srcStageMask,
4114                   VkPipelineStageFlags dstStageMask,
4115                   uint32_t memoryBarrierCount,
4116                   const VkMemoryBarrier *pMemoryBarriers,
4117                   uint32_t bufferMemoryBarrierCount,
4118                   const VkBufferMemoryBarrier *pBufferMemoryBarriers,
4119                   uint32_t imageMemoryBarrierCount,
4120                   const VkImageMemoryBarrier *pImageMemoryBarriers)
4121 {
4122    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4123    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4124 
4125    /* Intra-command list wait is handle by this pipeline flush, which is
4126     * overkill, but that's the best we can do with the standard D3D12 barrier
4127     * API.
4128     *
4129     * Inter-command list is taken care of by the serialization done at the
4130     * ExecuteCommandList() level:
4131     * "Calling ExecuteCommandLists twice in succession (from the same thread,
4132     *  or different threads) guarantees that the first workload (A) finishes
4133     *  before the second workload (B)"
4134     *
4135     * HOST -> DEVICE signaling is ignored and we assume events are always
4136     * signaled when we reach the vkCmdWaitEvents() point.:
4137     * "Command buffers in the submission can include vkCmdWaitEvents commands
4138     *  that wait on events that will not be signaled by earlier commands in the
4139     *  queue. Such events must be signaled by the application using vkSetEvent,
4140     *  and the vkCmdWaitEvents commands that wait upon them must not be inside
4141     *  a render pass instance.
4142     *  The event must be set before the vkCmdWaitEvents command is executed."
4143     */
4144    bool flush_pipeline = false;
4145 
4146    for (uint32_t i = 0; i < eventCount; i++) {
4147       VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
4148 
4149       struct hash_entry *he =
4150          _mesa_hash_table_search(cmdbuf->events.ht, event);
4151       if (he) {
4152          enum dzn_event_state state = (uintptr_t)he->data;
4153          assert(state != DZN_EVENT_STATE_RESET);
4154          flush_pipeline = state == DZN_EVENT_STATE_SET;
4155       } else {
4156          if (!_mesa_hash_table_insert(cmdbuf->events.ht, event,
4157                                       (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) {
4158             cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4159             return;
4160          }
4161 
4162          struct dzn_event **entry =
4163             util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1);
4164 
4165          if (!entry) {
4166             cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4167             return;
4168          }
4169 
4170          *entry = event;
4171       }
4172    }
4173 
4174    if (flush_pipeline) {
4175       D3D12_RESOURCE_BARRIER barrier = {
4176          .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4177          .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4178          .UAV = { .pResource = NULL },
4179       };
4180 
4181       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4182    }
4183 }
4184 
4185 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)4186 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
4187                   VkQueryPool queryPool,
4188                   uint32_t query,
4189                   VkQueryControlFlags flags)
4190 {
4191    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4192    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4193 
4194    struct dzn_cmd_buffer_query_pool_state *state =
4195       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4196    if (!state)
4197       return;
4198 
4199    qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags);
4200    dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query);
4201    ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4202 }
4203 
4204 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)4205 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
4206                 VkQueryPool queryPool,
4207                 uint32_t query)
4208 {
4209    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4210    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4211 
4212    struct dzn_cmd_buffer_query_pool_state *state =
4213       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4214    if (!state)
4215       return;
4216 
4217    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4218    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4219 }
4220 
4221 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)4222 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
4223                        VkPipelineStageFlags2 stage,
4224                        VkQueryPool queryPool,
4225                        uint32_t query)
4226 {
4227    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4228    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4229 
4230    struct dzn_cmd_buffer_query_pool_state *state =
4231       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4232    if (!state)
4233       return;
4234 
4235    /* Execution barrier so the timestamp gets written after the pipeline flush. */
4236    D3D12_RESOURCE_BARRIER barrier = {
4237       .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4238       .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4239       .UAV = { .pResource = NULL },
4240    };
4241 
4242    ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4243 
4244    qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP;
4245    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4246    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4247 }
4248 
4249 
4250 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)4251 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
4252                       VkQueryPool queryPool,
4253                       uint32_t firstQuery,
4254                       uint32_t queryCount)
4255 {
4256    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4257    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4258    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4259 
4260    struct dzn_cmd_buffer_query_pool_state *state =
4261       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4262 
4263    if (!state)
4264       return;
4265 
4266    uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
4267 
4268    for (uint32_t q = 0; q < queryCount; q += q_step) {
4269       uint32_t q_count = MIN2(queryCount - q, q_step);
4270 
4271       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4272                                         dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4273                                         device->queries.refs,
4274                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4275                                         q_count * sizeof(uint64_t));
4276    }
4277 
4278    q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
4279 
4280    for (uint32_t q = 0; q < queryCount; q += q_step) {
4281       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4282                                         dzn_query_pool_get_result_offset(qpool, firstQuery + q),
4283                                         device->queries.refs,
4284                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4285                                         qpool->query_size);
4286    }
4287 
4288    dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
4289    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
4290 }
4291 
4292 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)4293 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
4294                             VkQueryPool queryPool,
4295                             uint32_t firstQuery,
4296                             uint32_t queryCount,
4297                             VkBuffer dstBuffer,
4298                             VkDeviceSize dstOffset,
4299                             VkDeviceSize stride,
4300                             VkQueryResultFlags flags)
4301 {
4302    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4303    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4304    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4305 
4306    struct dzn_cmd_buffer_query_pool_state *qpstate =
4307       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4308    if (!qpstate)
4309       return;
4310 
4311    if (flags & VK_QUERY_RESULT_WAIT_BIT) {
4312       for (uint32_t i = 0; i < queryCount; i++) {
4313          if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) &&
4314              !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i))
4315             dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i);
4316       }
4317    }
4318 
4319    VkResult result =
4320       dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
4321    if (result != VK_SUCCESS)
4322       return;
4323 
4324    bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
4325                    stride == qpool->query_size &&
4326                    !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
4327 #define ALL_STATS \
4328         (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
4329          VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
4330          VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
4331          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
4332          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
4333          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
4334          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
4335          VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
4336          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
4337          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
4338          VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
4339    if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
4340        qpool->pipeline_statistics != ALL_STATS)
4341       raw_copy = false;
4342 #undef ALL_STATS
4343 
4344    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4345                                             D3D12_RESOURCE_STATE_COPY_DEST,
4346                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
4347                                             DZN_QUEUE_TRANSITION_FLUSH);
4348 
4349    if (raw_copy) {
4350       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4351                                         qpool->collect_buffer,
4352                                         dzn_query_pool_get_result_offset(qpool, firstQuery),
4353                                         dzn_query_pool_get_result_size(qpool, queryCount));
4354    } else {
4355       uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
4356 
4357       for (uint32_t q = 0; q < queryCount; q++) {
4358          uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
4359          uint32_t dst_counter_offset = 0;
4360 
4361          if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
4362             for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
4363                if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
4364                   continue;
4365 
4366                ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4367                                                  qpool->collect_buffer,
4368                                                  res_offset + (c * sizeof(uint64_t)),
4369                                                  step);
4370                dst_counter_offset += step;
4371             }
4372          } else {
4373             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4374                                               qpool->collect_buffer,
4375                                               res_offset, step);
4376             dst_counter_offset += step;
4377          }
4378 
4379          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
4380             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4381                                               qpool->collect_buffer,
4382                                               dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4383                                               step);
4384          }
4385 
4386          dstOffset += stride;
4387       }
4388    }
4389 
4390    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4391                                             D3D12_RESOURCE_STATE_COPY_SOURCE,
4392                                             D3D12_RESOURCE_STATE_COPY_DEST,
4393                                             0);
4394 }
4395 
4396 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)4397 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
4398                         VkBuffer buffer,
4399                         VkDeviceSize offset)
4400 {
4401    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4402    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4403    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4404 
4405    cmdbuf->state.sysvals.compute.group_count_x = 0;
4406    cmdbuf->state.sysvals.compute.group_count_y = 0;
4407    cmdbuf->state.sysvals.compute.group_count_z = 0;
4408    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4409       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4410 
4411    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4412 
4413    struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
4414       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
4415    ID3D12CommandSignature *cmdsig =
4416       dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
4417 
4418    if (!cmdsig) {
4419       cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4420       return;
4421    }
4422 
4423    ID3D12Resource *exec_buf;
4424    VkResult result =
4425       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
4426                                         D3D12_HEAP_TYPE_DEFAULT,
4427                                         D3D12_RESOURCE_STATE_COPY_DEST,
4428                                         &exec_buf);
4429    if (result != VK_SUCCESS)
4430       return;
4431 
4432    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
4433                                      buf->res,
4434                                      offset,
4435                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
4436    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
4437                                      buf->res,
4438                                      offset,
4439                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
4440 
4441    dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
4442                                             D3D12_RESOURCE_STATE_COPY_DEST,
4443                                             D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
4444                                             DZN_QUEUE_TRANSITION_FLUSH);
4445 
4446    ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
4447 }
4448 
4449 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)4450 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
4451                     float lineWidth)
4452 {
4453    assert(lineWidth == 1.0f);
4454 }
4455 
4456 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)4457 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
4458                     float depthBiasConstantFactor,
4459                     float depthBiasClamp,
4460                     float depthBiasSlopeFactor)
4461 {
4462    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4463 
4464    cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
4465    cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
4466    cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
4467    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4468 }
4469 
4470 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])4471 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
4472                          const float blendConstants[4])
4473 {
4474    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4475 
4476    memcpy(cmdbuf->state.blend.constants, blendConstants,
4477           sizeof(cmdbuf->state.blend.constants));
4478    cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
4479 }
4480 
4481 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)4482 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
4483                       float minDepthBounds,
4484                       float maxDepthBounds)
4485 {
4486    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4487    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4488    struct dzn_physical_device *pdev =
4489       container_of(device->vk.physical, struct dzn_physical_device, vk);
4490 
4491    if (pdev->options2.DepthBoundsTestSupported) {
4492       cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
4493       cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
4494       cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
4495    }
4496 }
4497 
4498 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)4499 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
4500                              VkStencilFaceFlags faceMask,
4501                              uint32_t compareMask)
4502 {
4503    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4504 
4505    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4506       cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
4507       cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
4508    }
4509 
4510    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4511       cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
4512       cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
4513    }
4514 
4515    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
4516    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4517 }
4518 
4519 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)4520 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
4521                            VkStencilFaceFlags faceMask,
4522                            uint32_t writeMask)
4523 {
4524    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4525 
4526    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4527       cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
4528       cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
4529    }
4530 
4531    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4532       cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
4533       cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
4534    }
4535 
4536    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
4537    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4538 }
4539 
4540 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)4541 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
4542                            VkStencilFaceFlags faceMask,
4543                            uint32_t reference)
4544 {
4545    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4546 
4547    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
4548       cmdbuf->state.zsa.stencil_test.front.ref = reference;
4549 
4550    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
4551       cmdbuf->state.zsa.stencil_test.back.ref = reference;
4552 
4553    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
4554 }
4555