• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30 
31 #include "dxil_spirv_nir.h"
32 
33 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)34 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
35                                         D3D12_RESOURCE_BARRIER *barriers,
36                                         uint32_t barrier_count)
37 {
38    assert(!cmdbuf->enhanced_barriers);
39    uint32_t flush_count = 0;
40    for (uint32_t b = 0; b < barrier_count; b++) {
41       assert(barriers[b].Transition.pResource);
42 
43       /* some layouts map to the same states, and NOP-barriers are illegal */
44       if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
45          if (flush_count) {
46             ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
47                                                        &barriers[b - flush_count]);
48             flush_count = 0;
49          }
50       } else {
51          flush_count++;
52       }
53    }
54 
55    if (flush_count)
56       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
57                                                  &barriers[barrier_count - flush_count]);
58 
59    /* Set Before = After so we don't execute the same barrier twice. */
60    for (uint32_t b = 0; b < barrier_count; b++)
61       barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
62 }
63 
64 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)65 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
66                                          ID3D12Resource *res,
67                                          uint32_t first_subres,
68                                          uint32_t subres_count)
69 {
70    assert(!cmdbuf->enhanced_barriers);
71    struct hash_entry *he =
72       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
73    D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
74 
75    if (!barriers)
76       return;
77 
78    dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
79 }
80 
81 enum dzn_queue_transition_flags {
82    DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
83    DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
84 };
85 
86 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)87 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
88                                          ID3D12Resource *res,
89                                          uint32_t first_subres,
90                                          uint32_t subres_count,
91                                          D3D12_RESOURCE_STATES before,
92                                          D3D12_RESOURCE_STATES after,
93                                          uint32_t flags)
94 {
95    assert(!cmdbuf->enhanced_barriers);
96    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
97    struct hash_entry *he =
98       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
99    struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
100 
101    if (!barriers) {
102       D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
103       D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
104       ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
105       uint32_t barrier_count =
106          fmt_info.PlaneCount *
107          desc.MipLevels * desc.DepthOrArraySize;
108 
109       barriers =
110          vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
111                    8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
112       if (!barriers)
113          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
114 
115       he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
116       if (!he)
117          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
118    }
119 
120    for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
121       if (!barriers[subres].Transition.pResource) {
122          barriers[subres] = (D3D12_RESOURCE_BARRIER) {
123             .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
124             .Flags = 0,
125             .Transition = {
126                .pResource = res,
127                .Subresource = subres,
128                .StateBefore = before,
129                .StateAfter = after,
130             },
131          };
132       } else {
133 	 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
134             before = barriers[subres].Transition.StateAfter;
135 
136          assert(barriers[subres].Transition.StateAfter == before ||
137                 barriers[subres].Transition.StateAfter == after);
138          barriers[subres].Transition.StateAfter = after;
139       }
140    }
141 
142    if (flags & DZN_QUEUE_TRANSITION_FLUSH)
143       dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
144 
145    return VK_SUCCESS;
146 }
147 
148 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)149 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
150                                                   const struct dzn_image *image,
151                                                   const VkImageSubresourceRange *range,
152                                                   D3D12_RESOURCE_STATES before,
153                                                   D3D12_RESOURCE_STATES after,
154                                                   uint32_t flags)
155 {
156    assert(!cmdbuf->enhanced_barriers);
157    uint32_t first_barrier = 0, barrier_count = 0;
158    VkResult ret = VK_SUCCESS;
159 
160    dzn_foreach_aspect(aspect, range->aspectMask) {
161       uint32_t layer_count = dzn_get_layer_count(image, range);
162       uint32_t level_count = dzn_get_level_count(image, range);
163       for (uint32_t layer = 0; layer < layer_count; layer++) {
164          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165          if (!barrier_count) {
166             first_barrier = subres;
167             barrier_count = level_count;
168             continue;
169          } else if (first_barrier + barrier_count == subres) {
170             barrier_count += level_count;
171             continue;
172          }
173 
174          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175                                                         first_barrier, barrier_count,
176                                                         before, after, flags);
177          if (ret != VK_SUCCESS)
178             return ret;
179 
180          barrier_count = 0;
181       }
182 
183       if (barrier_count) {
184          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185                                                         first_barrier, barrier_count,
186                                                         before, after, flags);
187          if (ret != VK_SUCCESS)
188             return ret;
189       }
190    }
191 
192    return VK_SUCCESS;
193 }
194 
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197                                                    const struct dzn_image *image,
198                                                    const VkImageSubresourceRange *range,
199                                                    VkImageLayout old_layout,
200                                                    VkImageLayout new_layout,
201                                                    uint32_t flags)
202 {
203    assert(!cmdbuf->enhanced_barriers);
204    uint32_t first_barrier = 0, barrier_count = 0;
205    VkResult ret = VK_SUCCESS;
206 
207    if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
208       flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
209 
210    dzn_foreach_aspect(aspect, range->aspectMask) {
211       D3D12_RESOURCE_STATES after =
212          dzn_image_layout_to_state(image, new_layout, aspect, cmdbuf->type);
213       D3D12_RESOURCE_STATES before =
214          (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
215           old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
216          D3D12_RESOURCE_STATE_COMMON :
217          dzn_image_layout_to_state(image, old_layout, aspect, cmdbuf->type);
218 
219       uint32_t layer_count = dzn_get_layer_count(image, range);
220       uint32_t level_count = dzn_get_level_count(image, range);
221       for (uint32_t layer = 0; layer < layer_count; layer++) {
222          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
223          if (!barrier_count) {
224             first_barrier = subres;
225             barrier_count = level_count;
226             continue;
227          } else if (first_barrier + barrier_count == subres) {
228             barrier_count += level_count;
229             continue;
230          }
231 
232          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
233                                                         first_barrier, barrier_count,
234                                                         before, after, flags);
235          if (ret != VK_SUCCESS)
236             return ret;
237 
238          barrier_count = 0;
239       }
240 
241       if (barrier_count) {
242          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
243                                                         first_barrier, barrier_count,
244                                                         before, after, flags);
245          if (ret != VK_SUCCESS)
246             return ret;
247       }
248    }
249 
250    return VK_SUCCESS;
251 }
252 
253 static void
dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer * cmdbuf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)254 dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer *cmdbuf,
255                               D3D12_BARRIER_SYNC sync_before,
256                               D3D12_BARRIER_SYNC sync_after,
257                               D3D12_BARRIER_ACCESS access_before,
258                               D3D12_BARRIER_ACCESS access_after)
259 {
260    assert(cmdbuf->enhanced_barriers);
261    D3D12_GLOBAL_BARRIER global = {
262       .SyncBefore = sync_before,
263       .SyncAfter = sync_after,
264       .AccessBefore = access_before,
265       .AccessAfter = access_after,
266    };
267    D3D12_BARRIER_GROUP group = {
268       .Type = D3D12_BARRIER_TYPE_GLOBAL,
269       .NumBarriers = 1,
270       .pGlobalBarriers = &global,
271    };
272    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
273 }
274 
275 static void
dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * buf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)276 dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer *cmdbuf,
277                               ID3D12Resource *buf,
278                               D3D12_BARRIER_SYNC sync_before,
279                               D3D12_BARRIER_SYNC sync_after,
280                               D3D12_BARRIER_ACCESS access_before,
281                               D3D12_BARRIER_ACCESS access_after)
282 {
283    assert(cmdbuf->enhanced_barriers);
284    D3D12_BUFFER_BARRIER buffer = {
285       .SyncBefore = sync_before,
286       .SyncAfter = sync_after,
287       .AccessBefore = access_before,
288       .AccessAfter = access_after,
289       .pResource = buf,
290       .Offset = 0,
291       .Size = UINT64_MAX,
292    };
293    D3D12_BARRIER_GROUP group = {
294       .Type = D3D12_BARRIER_TYPE_BUFFER,
295       .NumBarriers = 1,
296       .pBufferBarriers = &buffer,
297    };
298    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
299 }
300 
301 static void
dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after,D3D12_BARRIER_LAYOUT layout_before,D3D12_BARRIER_LAYOUT layout_after,const VkImageSubresourceRange * range)302 dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer *cmdbuf,
303                              const struct dzn_image *image,
304                              D3D12_BARRIER_SYNC sync_before,
305                              D3D12_BARRIER_SYNC sync_after,
306                              D3D12_BARRIER_ACCESS access_before,
307                              D3D12_BARRIER_ACCESS access_after,
308                              D3D12_BARRIER_LAYOUT layout_before,
309                              D3D12_BARRIER_LAYOUT layout_after,
310                              const VkImageSubresourceRange *range)
311 {
312    assert(cmdbuf->enhanced_barriers);
313    uint32_t first_plane = (range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
314    uint32_t plane_count = first_plane == 0 && (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? 2 : 1;
315    D3D12_TEXTURE_BARRIER texture = {
316       .SyncBefore = sync_before,
317       .SyncAfter = sync_after,
318       .AccessBefore = access_before,
319       .AccessAfter = access_after,
320       .LayoutBefore = layout_before,
321       .LayoutAfter = layout_after,
322       .Subresources.FirstArraySlice = range->baseArrayLayer,
323       .Subresources.NumArraySlices = dzn_get_layer_count(image, range),
324       .Subresources.IndexOrFirstMipLevel = range->baseMipLevel,
325       .Subresources.NumMipLevels = dzn_get_level_count(image, range),
326       .Subresources.FirstPlane = first_plane,
327       .Subresources.NumPlanes = plane_count,
328       .pResource = image->res,
329    };
330    D3D12_BARRIER_GROUP group = {
331       .Type = D3D12_BARRIER_TYPE_TEXTURE,
332       .NumBarriers = 1,
333       .pTextureBarriers = &texture,
334    };
335    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
336 }
337 
338 static D3D12_BARRIER_LAYOUT
dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout current_layout,D3D12_BARRIER_LAYOUT needed_layout,const VkImageSubresourceRange * range)339 dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer *cmdbuf,
340                               const struct dzn_image *image,
341                               VkImageLayout current_layout,
342                               D3D12_BARRIER_LAYOUT needed_layout,
343                               const VkImageSubresourceRange *range)
344 {
345    assert(cmdbuf->enhanced_barriers);
346    /* We shouldn't need these fixups on a subresource range which includes depth and stencil,
347       where one is read-only and the other is writable */
348    if (range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
349       assert(current_layout != VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL &&
350              current_layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL);
351    }
352 
353    /* Nothing needs to be done for these, the appropriate sync/access was already handled */
354    if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
355       return needed_layout;
356 
357    D3D12_BARRIER_LAYOUT current_d3d_layout = dzn_vk_layout_to_d3d_layout(current_layout, cmdbuf->type, range->aspectMask);
358    if (current_d3d_layout != needed_layout) {
359       dzn_cmd_buffer_image_barrier(cmdbuf, image,
360                                    D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
361                                    D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON,
362                                    current_d3d_layout, needed_layout, range);
363    }
364    return current_d3d_layout;
365 }
366 
367 static void
dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync,D3D12_BARRIER_ACCESS access,D3D12_BARRIER_LAYOUT needed_layout,D3D12_BARRIER_LAYOUT restore_layout,const VkImageSubresourceRange * range)368 dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer *cmdbuf,
369                               const struct dzn_image *image,
370                               D3D12_BARRIER_SYNC sync,
371                               D3D12_BARRIER_ACCESS access,
372                               D3D12_BARRIER_LAYOUT needed_layout,
373                               D3D12_BARRIER_LAYOUT restore_layout,
374                               const VkImageSubresourceRange *range)
375 {
376    if (needed_layout != restore_layout) {
377       dzn_cmd_buffer_image_barrier(cmdbuf, image,
378                                    sync, D3D12_BARRIER_SYNC_COPY,
379                                    access, D3D12_BARRIER_ACCESS_COMMON,
380                                    needed_layout, restore_layout, range);
381    }
382 }
383 
384 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)385 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
386 {
387    if (!cbuf)
388       return;
389 
390    struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
391 
392    if (cmdbuf->cmdlist)
393       ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
394 
395    if (cmdbuf->cmdlist8)
396       ID3D12GraphicsCommandList8_Release(cmdbuf->cmdlist8);
397 
398    if (cmdbuf->cmdlist9)
399       ID3D12GraphicsCommandList9_Release(cmdbuf->cmdlist9);
400 
401    if (cmdbuf->cmdalloc)
402       ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
403 
404    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
405       list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
406          list_del(&res->link);
407          ID3D12Resource_Release(res->res);
408          vk_free(&cbuf->pool->alloc, res);
409       }
410    }
411 
412    dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
413    dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
414    dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
415    dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
416    util_dynarray_fini(&cmdbuf->events.signal);
417    util_dynarray_fini(&cmdbuf->queries.reset);
418    util_dynarray_fini(&cmdbuf->queries.signal);
419 
420    if (cmdbuf->rtvs.ht) {
421       hash_table_foreach(cmdbuf->rtvs.ht, he)
422          vk_free(&cbuf->pool->alloc, he->data);
423       _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
424    }
425 
426    if (cmdbuf->dsvs.ht) {
427       hash_table_foreach(cmdbuf->dsvs.ht, he)
428          vk_free(&cbuf->pool->alloc, he->data);
429       _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
430    }
431 
432    if (cmdbuf->events.ht)
433       _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
434 
435    if (cmdbuf->queries.ht) {
436       hash_table_foreach(cmdbuf->queries.ht, he) {
437          struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
438          util_dynarray_fini(&qpstate->reset);
439          util_dynarray_fini(&qpstate->collect);
440          util_dynarray_fini(&qpstate->signal);
441          util_dynarray_fini(&qpstate->zero);
442          vk_free(&cbuf->pool->alloc, he->data);
443       }
444       _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
445    }
446 
447    if (cmdbuf->transition_barriers) {
448       hash_table_foreach(cmdbuf->transition_barriers, he)
449          vk_free(&cbuf->pool->alloc, he->data);
450       _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
451    }
452 
453    vk_command_buffer_finish(&cmdbuf->vk);
454    vk_free(&cbuf->pool->alloc, cmdbuf);
455 }
456 
457 static void
dzn_cmd_buffer_reset(struct vk_command_buffer * cbuf,VkCommandBufferResetFlags flags)458 dzn_cmd_buffer_reset(struct vk_command_buffer *cbuf, VkCommandBufferResetFlags flags)
459 {
460    struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
461 
462    /* Reset the state */
463    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
464    cmdbuf->state.multiview.num_views = 1;
465    cmdbuf->state.multiview.view_mask = 1;
466 
467    /* TODO: Return resources to the pool */
468    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
469       list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
470          list_del(&res->link);
471          ID3D12Resource_Release(res->res);
472          vk_free(&cmdbuf->vk.pool->alloc, res);
473       }
474    }
475    cmdbuf->cur_upload_buf = NULL;
476 
477    util_dynarray_clear(&cmdbuf->events.signal);
478    util_dynarray_clear(&cmdbuf->queries.reset);
479    util_dynarray_clear(&cmdbuf->queries.signal);
480    hash_table_foreach(cmdbuf->rtvs.ht, he)
481       vk_free(&cmdbuf->vk.pool->alloc, he->data);
482    _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
483    cmdbuf->null_rtv.ptr = 0;
484    dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
485    hash_table_foreach(cmdbuf->dsvs.ht, he)
486       vk_free(&cmdbuf->vk.pool->alloc, he->data);
487    _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
488    hash_table_foreach(cmdbuf->queries.ht, he) {
489       struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
490       util_dynarray_fini(&qpstate->reset);
491       util_dynarray_fini(&qpstate->collect);
492       util_dynarray_fini(&qpstate->signal);
493       util_dynarray_fini(&qpstate->zero);
494       vk_free(&cmdbuf->vk.pool->alloc, he->data);
495    }
496    _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
497    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
498    hash_table_foreach(cmdbuf->transition_barriers, he)
499       vk_free(&cmdbuf->vk.pool->alloc, he->data);
500    _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
501    dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
502    dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
503    dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
504 
505    if (cmdbuf->vk.state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING &&
506        cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
507       ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
508 
509    vk_command_buffer_reset(&cmdbuf->vk);
510 
511    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
512       ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
513 }
514 
515 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)516 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
517 {
518    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
519 }
520 
521 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)522 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
523 {
524    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
525 }
526 
527 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)528 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
529 {
530    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
531 }
532 
533 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)534 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
535 {
536    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
537 }
538 
539 static const struct vk_command_buffer_ops cmd_buffer_ops = {
540    .destroy = dzn_cmd_buffer_destroy,
541    .reset = dzn_cmd_buffer_reset,
542 };
543 
544 static const D3D12_BARRIER_SYNC cmd_buffer_valid_sync[] = {
545    [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_SYNC_VIDEO_DECODE |
546                                         D3D12_BARRIER_SYNC_VIDEO_PROCESS |
547                                         D3D12_BARRIER_SYNC_VIDEO_ENCODE),
548    [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_SYNC_ALL |
549                                         D3D12_BARRIER_SYNC_COMPUTE_SHADING |
550                                         D3D12_BARRIER_SYNC_RAYTRACING |
551                                         D3D12_BARRIER_SYNC_COPY |
552                                         D3D12_BARRIER_SYNC_EXECUTE_INDIRECT |
553                                         D3D12_BARRIER_SYNC_PREDICATION |
554                                         D3D12_BARRIER_SYNC_ALL_SHADING |
555                                         D3D12_BARRIER_SYNC_NON_PIXEL_SHADING |
556                                         D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
557                                         D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW |
558                                         D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
559                                         D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE),
560    [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_SYNC_ALL | D3D12_BARRIER_SYNC_COPY
561 };
562 static const D3D12_BARRIER_ACCESS cmd_buffer_valid_access[] = {
563    [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ |
564                                         D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE |
565                                         D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ |
566                                         D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE |
567                                         D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ |
568                                         D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE),
569    [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
570                                         D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
571                                         D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
572                                         D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
573                                         D3D12_BARRIER_ACCESS_PREDICATION |
574                                         D3D12_BARRIER_ACCESS_COPY_DEST |
575                                         D3D12_BARRIER_ACCESS_COPY_SOURCE |
576                                         D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ |
577                                         D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE),
578    [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_COPY_DEST,
579 };
580 
581 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)582 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
583                       VkCommandBuffer *out)
584 {
585    VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
586    struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
587    struct dzn_physical_device *pdev =
588       container_of(device->vk.physical, struct dzn_physical_device, vk);
589 
590    assert(pool->queue_family_index < pdev->queue_family_count);
591 
592    D3D12_COMMAND_LIST_TYPE type =
593       pdev->queue_families[pool->queue_family_index].desc.Type;
594 
595    struct dzn_cmd_buffer *cmdbuf =
596       vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
597    if (!cmdbuf)
598       return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
599 
600    VkResult result =
601       vk_command_buffer_init(pool, &cmdbuf->vk, &cmd_buffer_ops, info->level);
602    if (result != VK_SUCCESS) {
603       vk_free(&pool->alloc, cmdbuf);
604       return result;
605    }
606 
607    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
608    cmdbuf->state.multiview.num_views = 1;
609    cmdbuf->state.multiview.view_mask = 1;
610    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket)
611       list_inithead(&cmdbuf->internal_bufs[bucket]);
612    util_dynarray_init(&cmdbuf->events.signal, NULL);
613    util_dynarray_init(&cmdbuf->queries.reset, NULL);
614    util_dynarray_init(&cmdbuf->queries.signal, NULL);
615    dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
616                                  D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
617                                  false, &pool->alloc);
618    dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
619                                  D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
620                                  false, &pool->alloc);
621    dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
622                                  D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
623                                  true, &pool->alloc);
624    dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
625                                  D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
626                                  true, &pool->alloc);
627 
628    cmdbuf->events.ht =
629       _mesa_pointer_hash_table_create(NULL);
630    cmdbuf->queries.ht =
631       _mesa_pointer_hash_table_create(NULL);
632    cmdbuf->transition_barriers =
633       _mesa_pointer_hash_table_create(NULL);
634    cmdbuf->rtvs.ht =
635       _mesa_hash_table_create(NULL,
636                               dzn_cmd_buffer_rtv_key_hash_function,
637                               dzn_cmd_buffer_rtv_key_equals_function);
638    cmdbuf->dsvs.ht =
639       _mesa_hash_table_create(NULL,
640                               dzn_cmd_buffer_dsv_key_hash_function,
641                               dzn_cmd_buffer_dsv_key_equals_function);
642    if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
643        !cmdbuf->transition_barriers ||
644        !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
645       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
646       goto out;
647    }
648 
649    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
650       if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
651                                                       &IID_ID3D12CommandAllocator,
652                                                       (void **)&cmdbuf->cmdalloc))) {
653          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
654          goto out;
655       }
656 
657       if (FAILED(ID3D12Device4_CreateCommandList1(device->dev, 0, type,
658                                                   D3D12_COMMAND_LIST_FLAG_NONE,
659                                                   &IID_ID3D12GraphicsCommandList1,
660                                                   (void **)&cmdbuf->cmdlist))) {
661          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
662          goto out;
663       }
664 
665       (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList8, (void **)&cmdbuf->cmdlist8);
666       (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList9, (void **)&cmdbuf->cmdlist9);
667    }
668 
669    cmdbuf->type = type;
670    cmdbuf->valid_sync = cmd_buffer_valid_sync[type];
671    cmdbuf->valid_access = cmd_buffer_valid_access[type];
672    cmdbuf->enhanced_barriers = pdev->options12.EnhancedBarriersSupported;
673 
674 out:
675    if (result != VK_SUCCESS)
676       dzn_cmd_buffer_destroy(&cmdbuf->vk);
677    else
678       *out = dzn_cmd_buffer_to_handle(cmdbuf);
679 
680    return result;
681 }
682 
683 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)684 dzn_AllocateCommandBuffers(VkDevice device,
685                            const VkCommandBufferAllocateInfo *pAllocateInfo,
686                            VkCommandBuffer *pCommandBuffers)
687 {
688    VK_FROM_HANDLE(dzn_device, dev, device);
689    VkResult result = VK_SUCCESS;
690    uint32_t i;
691 
692    for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
693       result = dzn_cmd_buffer_create(pAllocateInfo,
694                                      &pCommandBuffers[i]);
695       if (result != VK_SUCCESS)
696          break;
697    }
698 
699    if (result != VK_SUCCESS) {
700       dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
701                                                 i, pCommandBuffers);
702       for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
703          pCommandBuffers[i] = VK_NULL_HANDLE;
704    }
705 
706    return result;
707 }
708 
709 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)710 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
711                        const VkCommandBufferBeginInfo *info)
712 {
713    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
714    vk_command_buffer_begin(&cmdbuf->vk, info);
715    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
716       ID3D12GraphicsCommandList1_Reset(cmdbuf->cmdlist, cmdbuf->cmdalloc, NULL);
717    return vk_command_buffer_get_record_result(&cmdbuf->vk);
718 }
719 
720 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)721 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
722 {
723    if (vk_command_buffer_has_error(&cmdbuf->vk))
724       goto out;
725 
726    hash_table_foreach(cmdbuf->events.ht, he) {
727       enum dzn_event_state state = (uintptr_t)he->data;
728 
729       struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
730       struct dzn_cmd_event_signal *entry =
731          util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
732 
733       if (!entry) {
734          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
735          break;
736       }
737 
738       *entry = signal;
739    }
740 
741 out:
742    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
743 }
744 
745 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)746 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
747 {
748 
749    if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
750       return VK_SUCCESS;
751 
752    unsigned old_sz = array->size;
753    void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
754    if (!ptr)
755       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
756 
757    memset(ptr, 0, array->size - old_sz);
758    return VK_SUCCESS;
759 }
760 
761 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)762 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
763 {
764    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
765 
766    if (bit < nbits)
767       return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
768 
769    return false;
770 }
771 
772 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)773 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
774 {
775    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
776    if (result != VK_SUCCESS)
777       return result;
778 
779    BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
780    return VK_SUCCESS;
781 }
782 
783 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)784 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
785 {
786    if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
787       return;
788 
789    BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
790 }
791 
792 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)793 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
794                                    struct util_dynarray *array,
795                                    uint32_t bit, uint32_t count)
796 {
797    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
798    if (result != VK_SUCCESS)
799       return result;
800 
801    BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
802    return VK_SUCCESS;
803 }
804 
805 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)806 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
807                                      struct util_dynarray *array,
808                                      uint32_t bit, uint32_t count)
809 {
810    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
811 
812    if (!nbits)
813       return;
814 
815    uint32_t end = MIN2(bit + count, nbits) - 1;
816 
817    while (bit <= end) {
818       uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
819       BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
820       bit += subcount;
821    }
822 }
823 
824 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)825 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
826 {
827    struct dzn_cmd_buffer_query_pool_state *state =
828       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
829                8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
830    if (!state) {
831       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
832       return NULL;
833    }
834 
835    util_dynarray_init(&state->reset, NULL);
836    util_dynarray_init(&state->collect, NULL);
837    util_dynarray_init(&state->signal, NULL);
838    util_dynarray_init(&state->zero, NULL);
839    return state;
840 }
841 
842 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)843 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
844                                         struct dzn_cmd_buffer_query_pool_state *state)
845 {
846    util_dynarray_fini(&state->reset);
847    util_dynarray_fini(&state->collect);
848    util_dynarray_fini(&state->signal);
849    util_dynarray_fini(&state->zero);
850    vk_free(&cmdbuf->vk.pool->alloc, state);
851 }
852 
853 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)854 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
855                                     struct dzn_query_pool *qpool)
856 {
857    struct dzn_cmd_buffer_query_pool_state *state = NULL;
858    struct hash_entry *he =
859       _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
860 
861    if (!he) {
862       state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
863       if (!state)
864          return NULL;
865 
866       he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
867       if (!he) {
868          dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
869          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
870          return NULL;
871       }
872    } else {
873       state = he->data;
874    }
875 
876    return state;
877 }
878 
879 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)880 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
881                                const struct dzn_query_pool *qpool,
882                                struct dzn_cmd_buffer_query_pool_state *state,
883                                uint32_t first_query,
884                                uint32_t query_count)
885 {
886    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
887    uint32_t nbits_collect = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
888    uint32_t nbits_zero = util_dynarray_num_elements(&state->zero, BITSET_WORD) * BITSET_WORDBITS;
889    uint32_t start, end;
890 
891    if (!nbits_collect && !nbits_zero)
892       return VK_SUCCESS;
893 
894    query_count = MIN2(query_count, MAX2(nbits_collect, nbits_zero) - first_query);
895    nbits_collect = MIN2(first_query + query_count, nbits_collect);
896    nbits_zero = MIN2(first_query + query_count, nbits_zero);
897 
898    VkResult result =
899       dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
900    if (result != VK_SUCCESS)
901       return result;
902 
903    if (cmdbuf->enhanced_barriers) {
904       /* A global barrier is used because both resolve_buffer and collect_buffer might have been
905        * copied from recently, and it's not worth the effort to track whether that's true. */
906       dzn_cmd_buffer_global_barrier(cmdbuf,
907                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
908                                     D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
909    } else {
910       dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
911    }
912 
913    /* Resolve the valid query regions into the resolve buffer */
914    BITSET_WORD *collect =
915       util_dynarray_element(&state->collect, BITSET_WORD, 0);
916 
917    for (start = first_query, end = first_query,
918         __bitset_next_range(&start, &end, collect, nbits_collect);
919         start < nbits_collect;
920         __bitset_next_range(&start, &end, collect, nbits_collect)) {
921       ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
922                                                   qpool->heap,
923                                                   qpool->queries[start].type,
924                                                   start, end - start,
925                                                   qpool->resolve_buffer,
926                                                   qpool->query_size * start);
927    }
928 
929    /* Zero out sections of the resolve buffer that contain queries for multi-view rendering
930     * for views other than the first one. */
931    BITSET_WORD *zero =
932       util_dynarray_element(&state->zero, BITSET_WORD, 0);
933    const uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
934 
935    for (start = first_query, end = first_query,
936         __bitset_next_range(&start, &end, zero, nbits_zero);
937         start < nbits_zero;
938         __bitset_next_range(&start, &end, zero, nbits_zero)) {
939       uint32_t count = end - start;
940 
941       for (unsigned i = 0; i < count; i += step) {
942          uint32_t sub_count = MIN2(step, count - i);
943 
944          ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
945                                                      qpool->resolve_buffer,
946                                                      dzn_query_pool_get_result_offset(qpool, start + i),
947                                                      device->queries.refs,
948                                                      DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
949                                                      qpool->query_size * sub_count);
950       }
951    }
952 
953    uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
954    uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
955 
956    if (cmdbuf->enhanced_barriers) {
957       dzn_cmd_buffer_buffer_barrier(cmdbuf,
958                                     qpool->resolve_buffer,
959                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
960                                     D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
961    } else {
962       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
963                                                0, 1,
964                                                D3D12_RESOURCE_STATE_COPY_DEST,
965                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
966                                                DZN_QUEUE_TRANSITION_FLUSH);
967    }
968 
969    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
970                                                qpool->collect_buffer, offset,
971                                                qpool->resolve_buffer, offset,
972                                                size);
973 
974    struct query_pass_data {
975       struct util_dynarray *dynarray;
976       BITSET_WORD *bitset;
977       uint32_t count;
978    } passes[] = {
979       { &state->collect, collect, nbits_collect },
980       { &state->zero, zero, nbits_zero }
981    };
982    for (uint32_t pass = 0; pass < ARRAY_SIZE(passes); ++pass) {
983       BITSET_WORD *bitset = passes[pass].bitset;
984       uint32_t nbits = passes[pass].count;
985       for (start = first_query, end = first_query,
986            __bitset_next_range(&start, &end, bitset, nbits);
987            start < nbits;
988            __bitset_next_range(&start, &end, bitset, nbits)) {
989          uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
990          uint32_t count = end - start;
991 
992          for (unsigned i = 0; i < count; i += step) {
993             uint32_t sub_count = MIN2(step, count - i);
994 
995             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
996                                                         qpool->collect_buffer,
997                                                         dzn_query_pool_get_availability_offset(qpool, start + i),
998                                                         device->queries.refs,
999                                                         DZN_QUERY_REFS_ALL_ONES_OFFSET,
1000                                                         sizeof(uint64_t) * sub_count);
1001          }
1002 
1003          dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
1004          dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, passes[pass].dynarray, start, count);
1005       }
1006    }
1007 
1008    if (!cmdbuf->enhanced_barriers) {
1009       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
1010                                                0, 1,
1011                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
1012                                                D3D12_RESOURCE_STATE_COPY_DEST,
1013                                                0);
1014    }
1015    return VK_SUCCESS;
1016 }
1017 
1018 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)1019 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
1020                                  struct dzn_query_pool *qpool,
1021                                  struct util_dynarray *bitset_array,
1022                                  struct util_dynarray *ops_array)
1023 {
1024    BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
1025    uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
1026    uint32_t start, end;
1027 
1028    BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
1029       struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
1030       struct dzn_cmd_buffer_query_range *entry =
1031          util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
1032 
1033       if (!entry)
1034          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1035 
1036       *entry = range;
1037    }
1038 
1039    return VK_SUCCESS;
1040 }
1041 
1042 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)1043 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
1044 {
1045    hash_table_foreach(cmdbuf->queries.ht, he) {
1046       struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
1047       struct dzn_cmd_buffer_query_pool_state *state = he->data;
1048       VkResult result =
1049          dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
1050       if (result != VK_SUCCESS)
1051          return result;
1052 
1053       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
1054       if (result != VK_SUCCESS)
1055          return result;
1056 
1057       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
1058       if (result != VK_SUCCESS)
1059          return result;
1060    }
1061 
1062    return VK_SUCCESS;
1063 }
1064 
1065 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)1066 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
1067 {
1068    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1069 
1070    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1071       dzn_cmd_buffer_gather_events(cmdbuf);
1072       dzn_cmd_buffer_gather_queries(cmdbuf);
1073       HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
1074       if (FAILED(hres))
1075          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1076    }
1077 
1078    return vk_command_buffer_end(&cmdbuf->vk);
1079 }
1080 
1081 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1082 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
1083                         const VkDependencyInfo *info)
1084 {
1085    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1086 
1087    bool execution_barrier =
1088       !info->memoryBarrierCount &&
1089       !info->bufferMemoryBarrierCount &&
1090       !info->imageMemoryBarrierCount;
1091 
1092    if (execution_barrier) {
1093       /* Execution barrier can be emulated with a NULL UAV barrier (AKA
1094        * pipeline flush). That's the best we can do with the standard D3D12
1095        * barrier API.
1096        */
1097       D3D12_RESOURCE_BARRIER barrier = {
1098          .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
1099          .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
1100          .UAV = { .pResource = NULL },
1101       };
1102 
1103       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1104    }
1105 
1106    /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
1107     * Scopes are not taken into account, but that's inherent to the current
1108     * D3D12 barrier API.
1109     */
1110    if (info->memoryBarrierCount) {
1111       D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
1112 
1113       barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1114       barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1115       barriers[0].UAV.pResource = NULL;
1116       barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
1117       barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1118       barriers[1].Aliasing.pResourceBefore = NULL;
1119       barriers[1].Aliasing.pResourceAfter = NULL;
1120       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
1121    }
1122 
1123    for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
1124       VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1125       D3D12_RESOURCE_BARRIER barrier = { 0 };
1126 
1127       /* UAV are used only for storage buffers, skip all other buffers. */
1128       if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
1129          continue;
1130 
1131       barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1132       barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1133       barrier.UAV.pResource = buf->res;
1134       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1135    }
1136 
1137    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
1138       const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
1139       const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
1140       VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
1141 
1142       VkImageLayout old_layout = ibarrier->oldLayout;
1143       VkImageLayout new_layout = ibarrier->newLayout;
1144       if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1145           old_layout == VK_IMAGE_LAYOUT_GENERAL &&
1146           (ibarrier->srcAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1147          old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1148       if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1149           new_layout == VK_IMAGE_LAYOUT_GENERAL &&
1150           (ibarrier->dstAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1151          new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1152       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1153                                                          old_layout,
1154                                                          new_layout,
1155                                                          DZN_QUEUE_TRANSITION_FLUSH);
1156    }
1157 }
1158 
1159 /* A straightforward translation of the Vulkan sync flags to D3D sync flags */
1160 static D3D12_BARRIER_SYNC
translate_sync(VkPipelineStageFlags2 flags,bool before)1161 translate_sync(VkPipelineStageFlags2 flags, bool before)
1162 {
1163    if (!before && (flags & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT))
1164       return D3D12_BARRIER_SYNC_ALL;
1165    else if (before && (flags & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
1166       return D3D12_BARRIER_SYNC_ALL;
1167 
1168    if (flags & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
1169                 /* Theoretically transfer should be less, but it encompasses blit
1170                  * (which can be draws) and clears, so bloat it up to everything. */
1171                 VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
1172                 VK_PIPELINE_STAGE_2_BLIT_BIT))
1173       return D3D12_BARRIER_SYNC_ALL;
1174 
1175    D3D12_BARRIER_SYNC ret = D3D12_BARRIER_SYNC_NONE;
1176    if (flags & (VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
1177                 VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
1178                 VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT))
1179       ret |= D3D12_BARRIER_SYNC_INDEX_INPUT;
1180    if (flags & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT)
1181       ret |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
1182    if (flags & (VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
1183                 VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
1184                 VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
1185                 VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
1186                 VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
1187                 VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |
1188                 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT))
1189       ret |= D3D12_BARRIER_SYNC_NON_PIXEL_SHADING;
1190    if (flags & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
1191                 VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR))
1192       ret |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
1193    if (flags & (VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
1194                 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT))
1195       ret |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
1196    if (flags & VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT)
1197       ret |= D3D12_BARRIER_SYNC_RENDER_TARGET;
1198    if (flags & VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)
1199       ret |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
1200    if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
1201       ret |= D3D12_BARRIER_SYNC_DRAW;
1202    if (flags & VK_PIPELINE_STAGE_2_COPY_BIT)
1203       ret |= D3D12_BARRIER_SYNC_COPY;
1204    if (flags & VK_PIPELINE_STAGE_2_RESOLVE_BIT)
1205       ret |= D3D12_BARRIER_SYNC_RESOLVE;
1206    if (flags & VK_PIPELINE_STAGE_2_CLEAR_BIT)
1207       ret |= D3D12_BARRIER_SYNC_RENDER_TARGET |
1208              D3D12_BARRIER_SYNC_DEPTH_STENCIL |
1209              D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
1210    if (flags & VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
1211       ret |= D3D12_BARRIER_SYNC_PREDICATION;
1212    if (flags & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
1213                 VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
1214       ret |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
1215    if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)
1216       ret |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
1217    if (flags & VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR)
1218       ret |= D3D12_BARRIER_SYNC_RAYTRACING;
1219    if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR)
1220       ret |= D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE;
1221 
1222    return ret;
1223 }
1224 
1225 /* A straightforward translation of Vulkan access to D3D access */
1226 static D3D12_BARRIER_ACCESS
translate_access(VkAccessFlags2 flags)1227 translate_access(VkAccessFlags2 flags)
1228 {
1229    D3D12_BARRIER_ACCESS ret = D3D12_BARRIER_ACCESS_COMMON;
1230    if (flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
1231       ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
1232    if (flags & VK_ACCESS_2_INDEX_READ_BIT)
1233       ret |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
1234    if (flags & VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT)
1235       ret |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
1236    if (flags & VK_ACCESS_2_UNIFORM_READ_BIT)
1237       ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
1238    if (flags & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
1239                 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
1240       ret |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1241    if (flags & VK_ACCESS_2_SHADER_READ_BIT)
1242       ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1243              D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1244              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1245    if (flags & (VK_ACCESS_2_SHADER_WRITE_BIT |
1246                 VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1247                 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT))
1248       ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1249    if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT)
1250       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1251              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1252    if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT)
1253       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1254              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1255    if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT)
1256       ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1257    if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)
1258       ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1259    if (flags & VK_ACCESS_2_TRANSFER_READ_BIT)
1260       ret |= D3D12_BARRIER_ACCESS_COPY_SOURCE |
1261              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1262    if (flags & VK_ACCESS_2_TRANSFER_WRITE_BIT)
1263       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1264              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1265              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1266              D3D12_BARRIER_ACCESS_COPY_DEST |
1267              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1268    if (flags & VK_ACCESS_2_MEMORY_READ_BIT)
1269       ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
1270              D3D12_BARRIER_ACCESS_INDEX_BUFFER |
1271              D3D12_BARRIER_ACCESS_VERTEX_BUFFER |
1272              D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1273              D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1274              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1275              D3D12_BARRIER_ACCESS_RENDER_TARGET |
1276              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1277              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1278              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1279    if (flags & VK_ACCESS_2_MEMORY_WRITE_BIT)
1280       ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1281              D3D12_BARRIER_ACCESS_RENDER_TARGET |
1282              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1283              D3D12_BARRIER_ACCESS_COPY_DEST |
1284              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1285    if (flags & (VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
1286                 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
1287                 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT))
1288       ret |= D3D12_BARRIER_ACCESS_STREAM_OUTPUT;
1289    if (flags & VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)
1290       ret |= D3D12_BARRIER_ACCESS_PREDICATION;
1291    if (flags & VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)
1292       ret |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1293    if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR)
1294       ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
1295    if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
1296       ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
1297    return ret;
1298 }
1299 
1300 /* For texture barriers, D3D will validate that the access flags used are actually
1301  * things that were valid for the specified layout. Use the mask returned from here
1302  * to scope down the set of app-provided access flags to make validation happy. */
1303 static D3D12_BARRIER_ACCESS
valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)1304 valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)
1305 {
1306    switch (layout) {
1307    case D3D12_BARRIER_LAYOUT_UNDEFINED:
1308       return D3D12_BARRIER_ACCESS_NO_ACCESS;
1309    case D3D12_BARRIER_LAYOUT_COMMON:
1310       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1311              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1312              D3D12_BARRIER_ACCESS_COPY_DEST;
1313    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON:
1314    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON:
1315       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1316              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1317              D3D12_BARRIER_ACCESS_COPY_DEST |
1318              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1319    case D3D12_BARRIER_LAYOUT_GENERIC_READ:
1320    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ:
1321       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1322              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1323              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1324              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE |
1325              D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1326    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ:
1327       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE|
1328              D3D12_BARRIER_ACCESS_COPY_SOURCE;
1329    case D3D12_BARRIER_LAYOUT_RENDER_TARGET:
1330       return D3D12_BARRIER_ACCESS_RENDER_TARGET;
1331    case D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS:
1332    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS:
1333    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS:
1334       return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1335    case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE:
1336       return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1337    case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ:
1338       return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1339    case D3D12_BARRIER_LAYOUT_SHADER_RESOURCE:
1340    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE:
1341    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE:
1342       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1343    case D3D12_BARRIER_LAYOUT_COPY_SOURCE:
1344    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE:
1345    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE:
1346       return D3D12_BARRIER_ACCESS_COPY_SOURCE;
1347    case D3D12_BARRIER_LAYOUT_COPY_DEST:
1348    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST:
1349    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST:
1350       return D3D12_BARRIER_ACCESS_COPY_DEST;
1351    case D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE:
1352       return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1353    case D3D12_BARRIER_LAYOUT_RESOLVE_DEST:
1354       return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1355    case D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE:
1356       return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1357    default:
1358       return D3D12_BARRIER_ACCESS_COMMON;
1359    }
1360 }
1361 
1362 /* Similar to layout -> access, there's access -> sync validation too. D3D
1363  * doesn't like over-synchronizing if you weren't accessing a resource through
1364  * a relevant access bit. */
1365 static D3D12_BARRIER_SYNC
adjust_sync_for_access(D3D12_BARRIER_SYNC in,D3D12_BARRIER_ACCESS access)1366 adjust_sync_for_access(D3D12_BARRIER_SYNC in, D3D12_BARRIER_ACCESS access)
1367 {
1368    /* NO_ACCESS must not add sync */
1369    if (access == D3D12_BARRIER_ACCESS_NO_ACCESS)
1370       return D3D12_BARRIER_SYNC_NONE;
1371    /* SYNC_ALL can be used with any access bits */
1372    if (in == D3D12_BARRIER_SYNC_ALL)
1373       return in;
1374    /* ACCESS_COMMON needs at least one sync bit */
1375    if (access == D3D12_BARRIER_ACCESS_COMMON)
1376       return in == D3D12_BARRIER_SYNC_NONE ? D3D12_BARRIER_SYNC_ALL : in;
1377 
1378    D3D12_BARRIER_SYNC out = D3D12_BARRIER_SYNC_NONE;
1379    if (access & D3D12_BARRIER_ACCESS_VERTEX_BUFFER)
1380       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1381                    D3D12_BARRIER_SYNC_DRAW |
1382                    D3D12_BARRIER_SYNC_ALL_SHADING |
1383                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1384    if (access & D3D12_BARRIER_ACCESS_CONSTANT_BUFFER)
1385       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1386                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1387                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1388                    D3D12_BARRIER_SYNC_DRAW |
1389                    D3D12_BARRIER_SYNC_ALL_SHADING |
1390                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1391    if (access & D3D12_BARRIER_ACCESS_INDEX_BUFFER)
1392       out |= in & D3D12_BARRIER_SYNC_INDEX_INPUT;
1393    if (access & D3D12_BARRIER_ACCESS_RENDER_TARGET)
1394       out |= in & D3D12_BARRIER_SYNC_RENDER_TARGET;
1395    if (access & D3D12_BARRIER_ACCESS_UNORDERED_ACCESS)
1396       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1397                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1398                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1399                    D3D12_BARRIER_SYNC_DRAW |
1400                    D3D12_BARRIER_SYNC_ALL_SHADING |
1401                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1402    if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE)
1403       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1404                    D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1405    if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ)
1406       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1407                    D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1408    if (access & D3D12_BARRIER_ACCESS_SHADER_RESOURCE)
1409       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1410                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1411                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1412                    D3D12_BARRIER_SYNC_DRAW |
1413                    D3D12_BARRIER_SYNC_ALL_SHADING |
1414                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1415    if (access & D3D12_BARRIER_ACCESS_STREAM_OUTPUT)
1416       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1417                    D3D12_BARRIER_SYNC_DRAW |
1418                    D3D12_BARRIER_SYNC_ALL_SHADING |
1419                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1420    if (access & D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT)
1421       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1422                    D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1423    if (access & D3D12_BARRIER_ACCESS_PREDICATION)
1424       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1425                    D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1426    if (access & (D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE))
1427       out |= in & D3D12_BARRIER_SYNC_COPY;
1428    if (access & (D3D12_BARRIER_ACCESS_RESOLVE_DEST | D3D12_BARRIER_ACCESS_RESOLVE_SOURCE))
1429       out |= in & D3D12_BARRIER_SYNC_RESOLVE;
1430    if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ)
1431       out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1432                    D3D12_BARRIER_SYNC_RAYTRACING |
1433                    D3D12_BARRIER_SYNC_ALL_SHADING |
1434                    D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1435                    D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1436                    D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
1437                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1438    if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE)
1439       out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1440                    D3D12_BARRIER_SYNC_RAYTRACING |
1441                    D3D12_BARRIER_SYNC_ALL_SHADING |
1442                    D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1443                    D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1444                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1445    if (access & D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE)
1446       out |= in & (D3D12_BARRIER_SYNC_PIXEL_SHADING |
1447                    D3D12_BARRIER_SYNC_ALL_SHADING);
1448    /* SYNC_NONE means it won't be accessed, so if we can't express the app's original intent
1449     * here, then be conservative and over-sync. */
1450    return out ? out : D3D12_BARRIER_SYNC_ALL;
1451 }
1452 
1453 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1454 dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,
1455                                  const VkDependencyInfo *info)
1456 {
1457    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1458 
1459    uint32_t num_barrier_groups = 0;
1460    D3D12_BARRIER_GROUP groups[3];
1461 
1462    /* Some input image barriers will expand into 2 outputs, and some will turn into buffer barriers.
1463     * Do a first pass and count how much we need to allocate. */
1464    uint32_t num_image_barriers = 0;
1465    uint32_t num_buffer_barriers = info->bufferMemoryBarrierCount;
1466    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1467       VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1468       bool need_separate_aspect_barriers =
1469          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1470          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1471          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1472          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1473       if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1474          ++num_buffer_barriers;
1475       else
1476          num_image_barriers += need_separate_aspect_barriers ? 2 : 1;
1477    }
1478 
1479    VK_MULTIALLOC(ma);
1480    VK_MULTIALLOC_DECL(&ma, D3D12_GLOBAL_BARRIER, global_barriers, info->memoryBarrierCount);
1481    VK_MULTIALLOC_DECL(&ma, D3D12_BUFFER_BARRIER, buffer_barriers, num_buffer_barriers);
1482    VK_MULTIALLOC_DECL(&ma, D3D12_TEXTURE_BARRIER, texture_barriers, num_image_barriers);
1483 
1484    if (ma.size == 0)
1485       return;
1486 
1487    if (!vk_multialloc_alloc(&ma, &cmdbuf->vk.pool->alloc,
1488                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
1489       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1490       return;
1491    }
1492 
1493    if (info->memoryBarrierCount) {
1494       groups[num_barrier_groups].NumBarriers = info->memoryBarrierCount;
1495       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_GLOBAL;
1496       groups[num_barrier_groups].pGlobalBarriers = global_barriers;
1497       ++num_barrier_groups;
1498       for (uint32_t i = 0; i < info->memoryBarrierCount; ++i) {
1499          global_barriers[i].SyncBefore = translate_sync(info->pMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1500          global_barriers[i].SyncAfter = translate_sync(info->pMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1501          global_barriers[i].AccessBefore = global_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1502             D3D12_BARRIER_ACCESS_NO_ACCESS :
1503             translate_access(info->pMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access;
1504          global_barriers[i].AccessAfter = global_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1505             D3D12_BARRIER_ACCESS_NO_ACCESS :
1506             translate_access(info->pMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access;
1507 
1508          if ((global_barriers[i].AccessBefore & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) &&
1509              (global_barriers[i].AccessAfter == D3D12_BARRIER_ACCESS_COMMON ||
1510               global_barriers[i].AccessAfter & ~(D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ))) {
1511             /* D3D validates against a global barrier attempting to transition from depth write to something other than depth write,
1512              * but this is a D3D bug; it's absolutely valid to use a global barrier to transition *multiple* types of accesses.
1513              * The validation does say that you'd need an image barrier to actually get that kind of transition, which is still correct,
1514              * so just remove this bit under the assumption that a dedicated image barrier will be submitted to do any necessary work later. */
1515             global_barriers[i].AccessBefore &= ~D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1516          }
1517          if (global_barriers[i].AccessBefore == D3D12_BARRIER_ACCESS_COMMON)
1518             global_barriers[i].AccessAfter = D3D12_BARRIER_ACCESS_COMMON;
1519          global_barriers[i].SyncBefore = adjust_sync_for_access(global_barriers[i].SyncBefore, global_barriers[i].AccessBefore);
1520          global_barriers[i].SyncAfter = adjust_sync_for_access(global_barriers[i].SyncAfter, global_barriers[i].AccessAfter);
1521       }
1522    }
1523 
1524    if (num_buffer_barriers) {
1525       groups[num_barrier_groups].NumBarriers = num_buffer_barriers;
1526       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_BUFFER;
1527       groups[num_barrier_groups].pBufferBarriers = buffer_barriers;
1528       ++num_barrier_groups;
1529       for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; ++i) {
1530          VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1531          buffer_barriers[i].SyncBefore = translate_sync(info->pBufferMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1532          buffer_barriers[i].SyncAfter = translate_sync(info->pBufferMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1533          buffer_barriers[i].AccessBefore = buffer_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1534             D3D12_BARRIER_ACCESS_NO_ACCESS :
1535             translate_access(info->pBufferMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & buf->valid_access;
1536          buffer_barriers[i].AccessAfter = buffer_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1537             D3D12_BARRIER_ACCESS_NO_ACCESS :
1538             translate_access(info->pBufferMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & buf->valid_access;
1539          buffer_barriers[i].SyncBefore = adjust_sync_for_access(buffer_barriers[i].SyncBefore, buffer_barriers[i].AccessBefore);
1540          buffer_barriers[i].SyncAfter = adjust_sync_for_access(buffer_barriers[i].SyncAfter, buffer_barriers[i].AccessAfter);
1541          buffer_barriers[i].pResource = buf->res;
1542          buffer_barriers[i].Offset = 0;
1543          buffer_barriers[i].Size = UINT64_MAX;
1544       }
1545    }
1546 
1547    if (num_image_barriers) {
1548       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_TEXTURE;
1549       groups[num_barrier_groups].pTextureBarriers = texture_barriers;
1550       groups[num_barrier_groups].NumBarriers = num_image_barriers;
1551       ++num_barrier_groups;
1552    }
1553 
1554    uint32_t tbar = 0;
1555    uint32_t bbar = info->bufferMemoryBarrierCount;
1556    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1557       VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1558 
1559       if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) {
1560          /* Barriers on linear images turn into buffer barriers */
1561          buffer_barriers[bbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1562          buffer_barriers[bbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1563          buffer_barriers[bbar].AccessBefore = buffer_barriers[bbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1564             D3D12_BARRIER_ACCESS_NO_ACCESS :
1565             translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & image->valid_access;
1566          buffer_barriers[bbar].AccessAfter = buffer_barriers[bbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1567             D3D12_BARRIER_ACCESS_NO_ACCESS :
1568             translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & image->valid_access;
1569          buffer_barriers[bbar].SyncBefore = adjust_sync_for_access(buffer_barriers[bbar].SyncBefore, buffer_barriers[bbar].AccessBefore);
1570          buffer_barriers[bbar].SyncAfter = adjust_sync_for_access(buffer_barriers[bbar].SyncAfter, buffer_barriers[bbar].AccessAfter);
1571          buffer_barriers[bbar].pResource = image->res;
1572          buffer_barriers[bbar].Offset = 0;
1573          buffer_barriers[bbar].Size = UINT64_MAX;
1574          ++bbar;
1575          continue;
1576       }
1577 
1578       const VkImageSubresourceRange *range = &info->pImageMemoryBarriers[i].subresourceRange;
1579       const bool simultaneous_access = image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
1580 
1581       bool need_separate_aspect_barriers =
1582          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1583          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1584          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1585          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1586       uint32_t num_aspects = need_separate_aspect_barriers ? 2 : 1;
1587       VkImageAspectFlags aspect_0_mask = need_separate_aspect_barriers ?
1588          (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) : VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM;
1589       VkImageAspectFlags aspects[] = {
1590          range->aspectMask & aspect_0_mask,
1591          range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT,
1592       };
1593 
1594       for (uint32_t aspect_idx = 0; aspect_idx < num_aspects; ++aspect_idx) {
1595          VkImageAspectFlags aspect = aspects[aspect_idx];
1596          texture_barriers[tbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1597          texture_barriers[tbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1598          const bool queue_ownership_transfer = info->pImageMemoryBarriers[i].srcQueueFamilyIndex != info->pImageMemoryBarriers[i].dstQueueFamilyIndex;
1599          D3D12_BARRIER_ACCESS layout_before_valid_access = ~0;
1600          D3D12_BARRIER_ACCESS layout_after_valid_access = ~0;
1601          if (simultaneous_access) {
1602             /* Simultaneous access textures never perform layout transitions, and can do any type of access from COMMON layout */
1603             texture_barriers[tbar].LayoutAfter = texture_barriers[tbar].LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED;
1604          } else if (queue_ownership_transfer) {
1605             /* For an ownership transfer, force the foreign layout to COMMON and the matching sync/access to NONE */
1606             assert(info->pImageMemoryBarriers[i].srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1607             assert(info->pImageMemoryBarriers[i].dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1608             const bool is_release = info->pImageMemoryBarriers[i].srcQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1609             const bool is_acquire = info->pImageMemoryBarriers[i].dstQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1610             assert(is_release ^ is_acquire);
1611             texture_barriers[tbar].LayoutBefore = is_acquire ?
1612                D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1613             texture_barriers[tbar].LayoutAfter = is_release ?
1614                D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1615             if (is_acquire) {
1616                texture_barriers[tbar].SyncBefore = D3D12_BARRIER_SYNC_NONE;
1617                texture_barriers[tbar].AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
1618                layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1619             } else {
1620                texture_barriers[tbar].SyncAfter = D3D12_BARRIER_SYNC_NONE;
1621                texture_barriers[tbar].AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
1622                layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1623             }
1624          } else {
1625             texture_barriers[tbar].LayoutBefore = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1626             texture_barriers[tbar].LayoutAfter = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1627             layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1628             layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1629          }
1630 
1631          texture_barriers[tbar].AccessBefore = texture_barriers[tbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ||
1632                                                 texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED ?
1633             D3D12_BARRIER_ACCESS_NO_ACCESS :
1634             translate_access(info->pImageMemoryBarriers[i].srcAccessMask) &
1635                cmdbuf->valid_access & image->valid_access & layout_before_valid_access;
1636          texture_barriers[tbar].AccessAfter = texture_barriers[tbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1637             D3D12_BARRIER_ACCESS_NO_ACCESS :
1638             translate_access(info->pImageMemoryBarriers[i].dstAccessMask) &
1639                cmdbuf->valid_access & image->valid_access & layout_after_valid_access;
1640 
1641          texture_barriers[tbar].SyncBefore = adjust_sync_for_access(texture_barriers[tbar].SyncBefore, texture_barriers[tbar].AccessBefore);
1642          texture_barriers[tbar].SyncAfter = adjust_sync_for_access(texture_barriers[tbar].SyncAfter, texture_barriers[tbar].AccessAfter);
1643          texture_barriers[tbar].Subresources.FirstArraySlice = range->baseArrayLayer;
1644          texture_barriers[tbar].Subresources.NumArraySlices = dzn_get_layer_count(image, range);
1645          texture_barriers[tbar].Subresources.IndexOrFirstMipLevel = range->baseMipLevel;
1646          texture_barriers[tbar].Subresources.NumMipLevels = dzn_get_level_count(image, range);
1647          texture_barriers[tbar].Subresources.FirstPlane = aspect_idx;
1648          texture_barriers[tbar].Subresources.NumPlanes = util_bitcount(aspect);
1649          texture_barriers[tbar].pResource = image->res;
1650          texture_barriers[tbar].Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE;
1651          if (texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED)
1652             texture_barriers[tbar].Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
1653          ++tbar;
1654       }
1655    }
1656    assert(bbar == num_buffer_barriers);
1657    assert(tbar == num_image_barriers);
1658 
1659    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, num_barrier_groups, groups);
1660 
1661    vk_free(&cmdbuf->vk.pool->alloc, global_barriers);
1662 }
1663 
1664 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)1665 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
1666                        const struct dzn_image *image,
1667                        const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
1668 {
1669    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1670    struct dzn_cmd_buffer_dsv_key key = { image, *desc };
1671    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
1672    struct dzn_cmd_buffer_dsv_entry *dsve;
1673 
1674    if (!he) {
1675       struct dzn_descriptor_heap *heap;
1676       uint32_t slot;
1677 
1678       // TODO: error handling
1679       dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
1680                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1681       dsve->key = key;
1682       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
1683       dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1684       ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
1685       _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
1686    } else {
1687       dsve = he->data;
1688    }
1689 
1690    return dsve->handle;
1691 }
1692 
1693 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1694 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1695                        const struct dzn_image *image,
1696                        const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1697 {
1698    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1699    struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1700    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1701    struct dzn_cmd_buffer_rtv_entry *rtve;
1702 
1703    if (!he) {
1704       struct dzn_descriptor_heap *heap;
1705       uint32_t slot;
1706 
1707       // TODO: error handling
1708       rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1709                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1710       rtve->key = key;
1711       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1712       rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1713       ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1714       he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1715    } else {
1716       rtve = he->data;
1717    }
1718 
1719    return rtve->handle;
1720 }
1721 
1722 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1723 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1724 {
1725    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1726 
1727    if (!cmdbuf->null_rtv.ptr) {
1728       struct dzn_descriptor_heap *heap;
1729       uint32_t slot;
1730       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1731       cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1732 
1733       D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1734       desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1735       desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1736       desc.Texture2D.MipSlice = 0;
1737       desc.Texture2D.PlaneSlice = 0;
1738 
1739       ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1740    }
1741 
1742    return cmdbuf->null_rtv;
1743 }
1744 
1745 static D3D12_HEAP_TYPE
heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)1746 heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)
1747 {
1748    switch (bucket) {
1749    case DZN_INTERNAL_BUF_UPLOAD: return D3D12_HEAP_TYPE_UPLOAD;
1750    case DZN_INTERNAL_BUF_DEFAULT: return D3D12_HEAP_TYPE_DEFAULT;
1751    default: unreachable("Invalid value");
1752    }
1753 }
1754 
1755 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,enum dzn_internal_buf_bucket bucket,D3D12_RESOURCE_STATES init_state,uint64_t align,ID3D12Resource ** out,uint64_t * offset)1756 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1757                                   uint32_t size,
1758                                   enum dzn_internal_buf_bucket bucket,
1759                                   D3D12_RESOURCE_STATES init_state,
1760                                   uint64_t align,
1761                                   ID3D12Resource **out,
1762                                   uint64_t *offset)
1763 {
1764    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1765    ID3D12Resource *res;
1766    *out = NULL;
1767    D3D12_HEAP_TYPE heap_type = heap_type_for_bucket(bucket);
1768 
1769    if (bucket == DZN_INTERNAL_BUF_UPLOAD && cmdbuf->cur_upload_buf) {
1770       uint64_t new_offset = ALIGN_POT(cmdbuf->cur_upload_buf_offset, align);
1771       if (cmdbuf->cur_upload_buf->size >= size + new_offset) {
1772          cmdbuf->cur_upload_buf_offset = new_offset + size;
1773          *out = cmdbuf->cur_upload_buf->res;
1774          *offset = new_offset;
1775          return VK_SUCCESS;
1776       }
1777       cmdbuf->cur_upload_buf = NULL;
1778       cmdbuf->cur_upload_buf_offset = 0;
1779    }
1780 
1781    uint32_t alloc_size = size;
1782    if (bucket == DZN_INTERNAL_BUF_UPLOAD)
1783       /* Walk through a 4MB upload buffer */
1784       alloc_size = ALIGN_POT(size, 4 * 1024 * 1024);
1785    else
1786       /* Align size on 64k (the default alignment) */
1787       alloc_size = ALIGN_POT(size, 64 * 1024);
1788 
1789    D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_type);
1790    D3D12_RESOURCE_DESC rdesc = {
1791       .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1792       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1793       .Width = alloc_size,
1794       .Height = 1,
1795       .DepthOrArraySize = 1,
1796       .MipLevels = 1,
1797       .Format = DXGI_FORMAT_UNKNOWN,
1798       .SampleDesc = { .Count = 1, .Quality = 0 },
1799       .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1800       .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1801    };
1802 
1803    HRESULT hres =
1804       ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1805                                             D3D12_HEAP_FLAG_NONE, &rdesc,
1806                                             init_state, NULL,
1807                                             &IID_ID3D12Resource,
1808                                             (void **)&res);
1809    if (FAILED(hres)) {
1810       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1811    }
1812 
1813    struct dzn_internal_resource *entry =
1814       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1815                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1816    if (!entry) {
1817       ID3D12Resource_Release(res);
1818       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1819    }
1820 
1821    entry->res = res;
1822    entry->size = alloc_size;
1823    list_addtail(&entry->link, &cmdbuf->internal_bufs[bucket]);
1824    *out = entry->res;
1825    if (offset)
1826       *offset = 0;
1827    if (bucket == DZN_INTERNAL_BUF_UPLOAD) {
1828       cmdbuf->cur_upload_buf = entry;
1829       cmdbuf->cur_upload_buf_offset = size;
1830    }
1831    return VK_SUCCESS;
1832 }
1833 
1834 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1835 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1836                                      const struct dzn_image *image,
1837                                      VkImageLayout layout,
1838                                      const VkClearColorValue *color,
1839                                      const VkImageSubresourceRange *range,
1840                                      uint32_t rect_count, D3D12_RECT *rects)
1841 {
1842    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1843    uint32_t blksize = util_format_get_blocksize(pfmt);
1844    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1845    uint32_t raw[4] = { 0 };
1846 
1847    assert(blksize <= sizeof(raw));
1848    assert(!(sizeof(buf) % blksize));
1849 
1850    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1851 
1852    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1853    while (fill_step % blksize)
1854       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1855 
1856    uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1857    uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1858    uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1859    uint32_t res_size = max_h * row_pitch;
1860 
1861    assert(fill_step <= sizeof(buf));
1862 
1863    for (uint32_t i = 0; i < fill_step; i += blksize)
1864       memcpy(&buf[i], raw, blksize);
1865 
1866    ID3D12Resource *src_res;
1867    uint64_t src_offset;
1868 
1869    VkResult result =
1870       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1871                                         DZN_INTERNAL_BUF_UPLOAD,
1872                                         D3D12_RESOURCE_STATE_GENERIC_READ,
1873                                         D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
1874                                         &src_res,
1875                                         &src_offset);
1876    if (result != VK_SUCCESS)
1877       return;
1878 
1879    assert(!(res_size % fill_step));
1880 
1881    uint8_t *cpu_ptr;
1882    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1883    cpu_ptr += src_offset;
1884    for (uint32_t i = 0; i < res_size; i += fill_step)
1885       memcpy(&cpu_ptr[i], buf, fill_step);
1886 
1887    ID3D12Resource_Unmap(src_res, 0, NULL);
1888 
1889    D3D12_TEXTURE_COPY_LOCATION src_loc = {
1890       .pResource = src_res,
1891       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1892       .PlacedFootprint = {
1893          .Offset = src_offset,
1894          .Footprint = {
1895             .Width = max_w,
1896             .Height = max_h,
1897             .Depth = 1,
1898             .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1899          },
1900       },
1901    };
1902 
1903    if (!cmdbuf->enhanced_barriers) {
1904       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1905                                                          layout,
1906                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1907                                                          DZN_QUEUE_TRANSITION_FLUSH);
1908    }
1909 
1910    assert(dzn_get_level_count(image, range) == 1);
1911    uint32_t layer_count = dzn_get_layer_count(image, range);
1912 
1913    dzn_foreach_aspect(aspect, range->aspectMask) {
1914       VkImageSubresourceLayers subres = {
1915          .aspectMask = (VkImageAspectFlags)aspect,
1916          .mipLevel = range->baseMipLevel,
1917          .baseArrayLayer = range->baseArrayLayer,
1918          .layerCount = layer_count,
1919       };
1920 
1921       for (uint32_t layer = 0; layer < layer_count; layer++) {
1922          D3D12_TEXTURE_COPY_LOCATION dst_loc =
1923             dzn_image_get_copy_loc(image, &subres, aspect, layer);
1924 
1925          src_loc.PlacedFootprint.Footprint.Format =
1926             dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1927             dst_loc.PlacedFootprint.Footprint.Format :
1928             image->desc.Format;
1929 
1930          for (uint32_t r = 0; r < rect_count; r++) {
1931             D3D12_BOX src_box = {
1932                .left = 0,
1933                .top = 0,
1934                .front = 0,
1935                .right = (UINT)(rects[r].right - rects[r].left),
1936                .bottom = (UINT)(rects[r].bottom - rects[r].top),
1937                .back = 1,
1938             };
1939 
1940             ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1941                                                          &dst_loc,
1942                                                          rects[r].left,
1943                                                          rects[r].top, 0,
1944                                                          &src_loc,
1945                                                          &src_box);
1946          }
1947       }
1948    }
1949 
1950    if (!cmdbuf->enhanced_barriers) {
1951       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1952                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1953                                                          layout,
1954                                                          DZN_QUEUE_TRANSITION_FLUSH);
1955    }
1956 }
1957 
1958 static VkClearColorValue
adjust_clear_color(struct dzn_physical_device * pdev,VkFormat format,const VkClearColorValue * col)1959 adjust_clear_color(struct dzn_physical_device *pdev,
1960                    VkFormat format, const VkClearColorValue *col)
1961 {
1962    VkClearColorValue out = *col;
1963 
1964    // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1965    // manually where it matters, like here, in the clear path.
1966    if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1967       if (pdev->support_a4b4g4r4) {
1968          DZN_SWAP(float, out.float32[0], out.float32[2]);
1969       } else {
1970          DZN_SWAP(float, out.float32[0], out.float32[1]);
1971          DZN_SWAP(float, out.float32[2], out.float32[3]);
1972       }
1973    }
1974 
1975    return out;
1976 }
1977 
1978 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1979 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1980                                       const struct dzn_image *image,
1981                                       VkImageLayout layout,
1982                                       const VkClearColorValue *color,
1983                                       uint32_t range_count,
1984                                       const VkImageSubresourceRange *ranges)
1985 {
1986    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1987    uint32_t blksize = util_format_get_blocksize(pfmt);
1988    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1989    uint32_t raw[4] = { 0 };
1990 
1991    assert(blksize <= sizeof(raw));
1992    assert(!(sizeof(buf) % blksize));
1993 
1994    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1995 
1996    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1997    while (fill_step % blksize)
1998       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1999 
2000    uint32_t res_size = 0;
2001    for (uint32_t r = 0; r < range_count; r++) {
2002       uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
2003       uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
2004       uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
2005       uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
2006 
2007       res_size = MAX2(res_size, h * d * row_pitch);
2008    }
2009 
2010    assert(fill_step <= sizeof(buf));
2011 
2012    for (uint32_t i = 0; i < fill_step; i += blksize)
2013       memcpy(&buf[i], raw, blksize);
2014 
2015    ID3D12Resource *src_res;
2016    uint64_t src_offset;
2017 
2018    VkResult result =
2019       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
2020                                         DZN_INTERNAL_BUF_UPLOAD,
2021                                         D3D12_RESOURCE_STATE_GENERIC_READ,
2022                                         D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
2023                                         &src_res,
2024                                         &src_offset);
2025    if (result != VK_SUCCESS)
2026       return;
2027 
2028    assert(!(res_size % fill_step));
2029 
2030    uint8_t *cpu_ptr;
2031    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
2032    cpu_ptr += src_offset;
2033    for (uint32_t i = 0; i < res_size; i += fill_step)
2034       memcpy(&cpu_ptr[i], buf, fill_step);
2035 
2036    ID3D12Resource_Unmap(src_res, 0, NULL);
2037 
2038    D3D12_TEXTURE_COPY_LOCATION src_loc = {
2039       .pResource = src_res,
2040       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
2041       .PlacedFootprint = {
2042          .Offset = src_offset,
2043       },
2044    };
2045 
2046    for (uint32_t r = 0; r < range_count; r++) {
2047       uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
2048       uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
2049 
2050       if (!cmdbuf->enhanced_barriers) {
2051          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2052                                                             layout,
2053                                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2054                                                             DZN_QUEUE_TRANSITION_FLUSH);
2055       }
2056 
2057       dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
2058          for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2059             uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
2060             uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
2061             uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
2062             VkImageSubresourceLayers subres = {
2063                .aspectMask = (VkImageAspectFlags)aspect,
2064                .mipLevel = ranges[r].baseMipLevel + lvl,
2065                .baseArrayLayer = ranges[r].baseArrayLayer,
2066                .layerCount = layer_count,
2067             };
2068 
2069             for (uint32_t layer = 0; layer < layer_count; layer++) {
2070                D3D12_TEXTURE_COPY_LOCATION dst_loc =
2071                   dzn_image_get_copy_loc(image, &subres, aspect, layer);
2072 
2073                src_loc.PlacedFootprint.Footprint.Format =
2074                   dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
2075                   dst_loc.PlacedFootprint.Footprint.Format :
2076                   image->desc.Format;
2077                src_loc.PlacedFootprint.Footprint.Width = w;
2078                src_loc.PlacedFootprint.Footprint.Height = h;
2079                src_loc.PlacedFootprint.Footprint.Depth = d;
2080                src_loc.PlacedFootprint.Footprint.RowPitch =
2081                   ALIGN_NPOT(w * blksize, fill_step);
2082                D3D12_BOX src_box = {
2083                   .left = 0,
2084                   .top = 0,
2085                   .front = 0,
2086                   .right = w,
2087                   .bottom = h,
2088                   .back = d,
2089                };
2090 
2091                ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
2092                                                   &src_loc, &src_box);
2093 
2094             }
2095          }
2096       }
2097 
2098       if (!cmdbuf->enhanced_barriers) {
2099          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2100                                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2101                                                             layout,
2102                                                             DZN_QUEUE_TRANSITION_FLUSH);
2103       }
2104    }
2105 }
2106 
2107 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)2108 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
2109                                 struct dzn_image_view *view,
2110                                 VkImageLayout layout,
2111                                 const VkClearValue *value,
2112                                 VkImageAspectFlags aspects,
2113                                 uint32_t base_layer,
2114                                 uint32_t layer_count,
2115                                 uint32_t rect_count,
2116                                 D3D12_RECT *rects)
2117 {
2118    struct dzn_image *image =
2119       container_of(view->vk.image, struct dzn_image, vk);
2120    struct dzn_physical_device *pdev =
2121       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2122 
2123    VkImageSubresourceRange range = {
2124       .aspectMask = aspects,
2125       .baseMipLevel = view->vk.base_mip_level,
2126       .levelCount = 1,
2127       .baseArrayLayer = view->vk.base_array_layer + base_layer,
2128       .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
2129                     view->vk.layer_count - base_layer : layer_count,
2130    };
2131 
2132    layer_count = vk_image_subresource_layer_count(&image->vk, &range);
2133    D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2134 
2135    if (vk_format_is_depth_or_stencil(view->vk.format)) {
2136       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2137 
2138       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2139          flags |= D3D12_CLEAR_FLAG_DEPTH;
2140       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2141          flags |= D3D12_CLEAR_FLAG_STENCIL;
2142 
2143       if (flags != 0) {
2144          if (cmdbuf->enhanced_barriers) {
2145             restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2146                                                            layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2147                                                            &range);
2148          } else {
2149             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2150                                                                layout,
2151                                                                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2152                                                                DZN_QUEUE_TRANSITION_FLUSH);
2153          }
2154 
2155          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
2156          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2157          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
2158                                                 value->depthStencil.depth,
2159                                                 value->depthStencil.stencil,
2160                                                 rect_count, rects);
2161 
2162          if (cmdbuf->enhanced_barriers) {
2163             dzn_cmd_buffer_restore_layout(cmdbuf, image,
2164                                           D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2165                                           D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2166                                           &range);
2167          } else {
2168             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2169                                                                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2170                                                                layout,
2171                                                                DZN_QUEUE_TRANSITION_FLUSH);
2172          }
2173       }
2174    } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2175       VkClearColorValue color = adjust_clear_color(pdev, view->vk.format, &value->color);
2176       bool clear_with_cpy = false;
2177       float vals[4];
2178 
2179       if (vk_format_is_sint(view->vk.format)) {
2180          for (uint32_t i = 0; i < 4; i++) {
2181             vals[i] = color.int32[i];
2182             if (color.int32[i] != (int32_t)vals[i]) {
2183                clear_with_cpy = true;
2184                break;
2185             }
2186          }
2187       } else if (vk_format_is_uint(view->vk.format)) {
2188          for (uint32_t i = 0; i < 4; i++) {
2189             vals[i] = color.uint32[i];
2190             if (color.uint32[i] != (uint32_t)vals[i]) {
2191                clear_with_cpy = true;
2192                break;
2193             }
2194          }
2195       } else {
2196          for (uint32_t i = 0; i < 4; i++)
2197             vals[i] = color.float32[i];
2198       }
2199 
2200       if (clear_with_cpy) {
2201          dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
2202                                               VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2203                                               &value->color,
2204                                               &range, rect_count, rects);
2205       } else {
2206          if (cmdbuf->enhanced_barriers) {
2207             restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2208                                                            layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2209                                                            &range);
2210          } else {
2211             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2212                                                                layout,
2213                                                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2214                                                                DZN_QUEUE_TRANSITION_FLUSH);
2215          }
2216 
2217          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
2218          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2219          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
2220 
2221          if (cmdbuf->enhanced_barriers) {
2222             dzn_cmd_buffer_restore_layout(cmdbuf, image,
2223                                           D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2224                                           D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2225                                           &range);
2226          } else {
2227             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2228                                                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2229                                                                layout,
2230                                                                DZN_QUEUE_TRANSITION_FLUSH);
2231          }
2232       }
2233    }
2234 }
2235 
2236 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)2237 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
2238                            const struct dzn_image *image,
2239                            VkImageLayout layout,
2240                            const VkClearColorValue *col,
2241                            uint32_t range_count,
2242                            const VkImageSubresourceRange *ranges)
2243 {
2244    struct dzn_physical_device *pdev =
2245       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2246    if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ||
2247        cmdbuf->type != D3D12_COMMAND_LIST_TYPE_DIRECT) {
2248       dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2249       return;
2250    }
2251 
2252    VkClearColorValue color = adjust_clear_color(pdev, image->vk.format, col);
2253    float clear_vals[4];
2254 
2255    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
2256    D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2257 
2258    if (util_format_is_pure_sint(pfmt)) {
2259       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2260          clear_vals[c] = color.int32[c];
2261          if (color.int32[c] != (int32_t)clear_vals[c]) {
2262             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2263             return;
2264          }
2265       }
2266    } else if (util_format_is_pure_uint(pfmt)) {
2267       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2268          clear_vals[c] = color.uint32[c];
2269          if (color.uint32[c] != (uint32_t)clear_vals[c]) {
2270             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2271             return;
2272          }
2273       }
2274    } else {
2275       memcpy(clear_vals, color.float32, sizeof(clear_vals));
2276    }
2277 
2278    for (uint32_t r = 0; r < range_count; r++) {
2279       const VkImageSubresourceRange *range = &ranges[r];
2280       uint32_t level_count = dzn_get_level_count(image, range);
2281 
2282       if (cmdbuf->enhanced_barriers) {
2283          restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2284                                                         layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2285                                                         range);
2286       } else {
2287          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2288                                                             layout,
2289                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2290                                                             DZN_QUEUE_TRANSITION_FLUSH);
2291       }
2292 
2293       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2294          VkImageSubresourceRange view_range = *range;
2295 
2296          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
2297             view_range.baseArrayLayer = 0;
2298             view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
2299          }
2300 
2301          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
2302          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2303          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
2304       }
2305 
2306       if (cmdbuf->enhanced_barriers) {
2307          dzn_cmd_buffer_restore_layout(cmdbuf, image,
2308                                        D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2309                                        D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2310                                        range);
2311       } else {
2312          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2313                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2314                                                             layout,
2315                                                             DZN_QUEUE_TRANSITION_FLUSH);
2316       }
2317    }
2318 }
2319 
2320 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)2321 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
2322                         const struct dzn_image *image,
2323                         VkImageLayout layout,
2324                         const VkClearDepthStencilValue *zs,
2325                         uint32_t range_count,
2326                         const VkImageSubresourceRange *ranges)
2327 {
2328    assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
2329 
2330    for (uint32_t r = 0; r < range_count; r++) {
2331       const VkImageSubresourceRange *range = &ranges[r];
2332       uint32_t level_count = dzn_get_level_count(image, range);
2333 
2334       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2335       D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2336 
2337       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2338          flags |= D3D12_CLEAR_FLAG_DEPTH;
2339       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2340          flags |= D3D12_CLEAR_FLAG_STENCIL;
2341 
2342       if (cmdbuf->enhanced_barriers) {
2343          restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2344                                                         layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2345                                                         range);
2346       } else {
2347          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2348                                                             layout,
2349                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2350                                                             DZN_QUEUE_TRANSITION_FLUSH);
2351       }
2352 
2353       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2354          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
2355          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2356          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
2357                                                           handle, flags,
2358                                                           zs->depth,
2359                                                           zs->stencil,
2360                                                           0, NULL);
2361       }
2362 
2363       if (cmdbuf->enhanced_barriers) {
2364          dzn_cmd_buffer_restore_layout(cmdbuf, image,
2365                                        D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2366                                        D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2367                                        range);
2368       } else {
2369          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2370                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2371                                                             layout,
2372                                                             DZN_QUEUE_TRANSITION_FLUSH);
2373       }
2374    }
2375 }
2376 
2377 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2378 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
2379                                    const VkCopyBufferToImageInfo2 *info,
2380                                    uint32_t r,
2381                                    VkImageAspectFlagBits aspect,
2382                                    uint32_t l)
2383 {
2384    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2385    VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
2386    struct dzn_physical_device *pdev =
2387       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2388 
2389    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2390 
2391    VkBufferImageCopy2 region = info->pRegions[r];
2392    enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
2393    uint32_t blkh = util_format_get_blockheight(pfmt);
2394    uint32_t blkd = util_format_get_blockdepth(pfmt);
2395 
2396    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2397     * to not be block aligned if it's reaching the image boundary, offsets still
2398     * have to be aligned. Align the image extent to make D3D12 happy.
2399     */
2400    dzn_image_align_extent(dst_image, &region.imageExtent);
2401 
2402    D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
2403       dzn_image_get_copy_loc(dst_image, &region.imageSubresource, aspect, l);
2404    D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
2405       dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, &region, aspect, l);
2406 
2407    if (dzn_buffer_supports_region_copy(pdev, &src_buf_loc)) {
2408       /* RowPitch and Offset are properly aligned, we can copy
2409        * the whole thing in one call.
2410        */
2411       D3D12_BOX src_box = {
2412          .left = 0,
2413          .top = 0,
2414          .front = 0,
2415          .right = region.imageExtent.width,
2416          .bottom = region.imageExtent.height,
2417          .back = region.imageExtent.depth,
2418       };
2419 
2420       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
2421                                                    region.imageOffset.x,
2422                                                    region.imageOffset.y,
2423                                                    region.imageOffset.z,
2424                                                    &src_buf_loc, &src_box);
2425       return;
2426    }
2427 
2428    /* Copy line-by-line if things are not properly aligned. */
2429    D3D12_BOX src_box = {
2430       .top = 0,
2431       .front = 0,
2432       .bottom = blkh,
2433       .back = blkd,
2434    };
2435 
2436    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2437       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2438          uint32_t src_x;
2439 
2440          D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
2441             dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
2442                                          &region, &src_buf_loc,
2443                                          y, z, &src_x);
2444 
2445          src_box.left = src_x;
2446          src_box.right = src_x + region.imageExtent.width;
2447          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2448                                                       &dst_img_loc,
2449                                                       region.imageOffset.x,
2450                                                       region.imageOffset.y + y,
2451                                                       region.imageOffset.z + z,
2452                                                       &src_buf_line_loc,
2453                                                       &src_box);
2454       }
2455    }
2456 }
2457 
2458 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2459 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
2460                                    const VkCopyImageToBufferInfo2 *info,
2461                                    uint32_t r,
2462                                    VkImageAspectFlagBits aspect,
2463                                    uint32_t l)
2464 {
2465    VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
2466    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2467    struct dzn_physical_device *pdev =
2468       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2469 
2470    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2471 
2472    VkBufferImageCopy2 region = info->pRegions[r];
2473    enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
2474    uint32_t blkh = util_format_get_blockheight(pfmt);
2475    uint32_t blkd = util_format_get_blockdepth(pfmt);
2476 
2477    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2478     * to not be block aligned if it's reaching the image boundary, offsets still
2479     * have to be aligned. Align the image extent to make D3D12 happy.
2480     */
2481    dzn_image_align_extent(src_image, &region.imageExtent);
2482 
2483    D3D12_TEXTURE_COPY_LOCATION src_img_loc =
2484       dzn_image_get_copy_loc(src_image, &region.imageSubresource, aspect, l);
2485    D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
2486       dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, &region, aspect, l);
2487 
2488    if (dzn_buffer_supports_region_copy(pdev, &dst_buf_loc)) {
2489       /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
2490        * the whole thing in one call.
2491        */
2492       D3D12_BOX src_box = {
2493          .left = (UINT)region.imageOffset.x,
2494          .top = (UINT)region.imageOffset.y,
2495          .front = (UINT)region.imageOffset.z,
2496          .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2497          .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
2498          .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
2499       };
2500 
2501       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
2502                                                    0, 0, 0, &src_img_loc,
2503                                                    &src_box);
2504       return;
2505    }
2506 
2507    D3D12_BOX src_box = {
2508       .left = (UINT)region.imageOffset.x,
2509       .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2510    };
2511 
2512    /* Copy line-by-line if things are not properly aligned. */
2513    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2514       src_box.front = region.imageOffset.z + z;
2515       src_box.back = src_box.front + blkd;
2516 
2517       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2518          uint32_t dst_x;
2519 
2520          D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
2521             dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
2522                                          &region, &dst_buf_loc,
2523                                          y, z, &dst_x);
2524 
2525          src_box.top = region.imageOffset.y + y;
2526          src_box.bottom = src_box.top + blkh;
2527 
2528          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2529                                                       &dst_buf_line_loc,
2530                                                       dst_x, 0, 0,
2531                                                       &src_img_loc,
2532                                                       &src_box);
2533       }
2534    }
2535 }
2536 
2537 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2538 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
2539                               const VkCopyImageInfo2 *info,
2540                               D3D12_RESOURCE_DESC *tmp_desc,
2541                               D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
2542                               uint32_t r,
2543                               VkImageAspectFlagBits aspect,
2544                               uint32_t l)
2545 {
2546    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2547    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2548    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2549    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2550 
2551    ID3D12Device4 *dev = device->dev;
2552    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2553 
2554    VkImageCopy2 region = info->pRegions[r];
2555    dzn_image_align_extent(src, &region.extent);
2556 
2557    const VkImageSubresourceLayers *src_subres = &region.srcSubresource;
2558    const VkImageSubresourceLayers *dst_subres = &region.dstSubresource;
2559    VkFormat src_format =
2560       dzn_image_get_plane_format(src->vk.format, aspect);
2561    VkFormat dst_format =
2562       dzn_image_get_plane_format(dst->vk.format, aspect);
2563 
2564    enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
2565    uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
2566    uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
2567    uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
2568    enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
2569    uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
2570    uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
2571    uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
2572    uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
2573    uint32_t depth = region.extent.depth;
2574    uint32_t dst_l = l, src_l = l;
2575 
2576    assert(src_subres->aspectMask == dst_subres->aspectMask);
2577 
2578    if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
2579        dst->vk.image_type == VK_IMAGE_TYPE_2D) {
2580       assert(src_subres->layerCount == 1);
2581       src_l = 0;
2582       src_z += l;
2583       depth = 1;
2584    } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
2585               dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2586       assert(dst_subres->layerCount == 1);
2587       dst_l = 0;
2588       dst_z += l;
2589       depth = 1;
2590    } else {
2591       assert(src_subres->layerCount == dst_subres->layerCount);
2592    }
2593 
2594    D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
2595    D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
2596 
2597    D3D12_BOX src_box = {
2598       .left = (UINT)MAX2(region.srcOffset.x, 0),
2599       .top = (UINT)MAX2(region.srcOffset.y, 0),
2600       .front = (UINT)MAX2(src_z, 0),
2601       .right = (UINT)region.srcOffset.x + region.extent.width,
2602       .bottom = (UINT)region.srcOffset.y + region.extent.height,
2603       .back = (UINT)src_z + depth,
2604    };
2605 
2606    if (!tmp_loc->pResource) {
2607       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2608                                                    region.dstOffset.x,
2609                                                    region.dstOffset.y,
2610                                                    dst_z, &src_loc,
2611                                                    &src_box);
2612       return;
2613    }
2614 
2615    tmp_desc->Format =
2616       dzn_image_get_placed_footprint_format(pdev, src->vk.format, aspect);
2617    tmp_desc->Width = region.extent.width;
2618    tmp_desc->Height = region.extent.height;
2619 
2620    ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
2621                                        0, 1, 0,
2622                                        &tmp_loc->PlacedFootprint,
2623                                        NULL, NULL, NULL);
2624 
2625    tmp_loc->PlacedFootprint.Footprint.Depth = depth;
2626 
2627    if (r > 0 || l > 0) {
2628       if (cmdbuf->enhanced_barriers) {
2629          dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2630                                        D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2631                                        D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
2632       } else {
2633          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2634                                                   D3D12_RESOURCE_STATE_COPY_SOURCE,
2635                                                   D3D12_RESOURCE_STATE_COPY_DEST,
2636                                                   DZN_QUEUE_TRANSITION_FLUSH);
2637       }
2638    }
2639 
2640    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
2641 
2642    if (r > 0 || l > 0) {
2643       if (cmdbuf->enhanced_barriers) {
2644          dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2645                                        D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2646                                        D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
2647       } else {
2648          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2649                                                   D3D12_RESOURCE_STATE_COPY_DEST,
2650                                                   D3D12_RESOURCE_STATE_COPY_SOURCE,
2651                                                   DZN_QUEUE_TRANSITION_FLUSH);
2652       }
2653    }
2654 
2655    tmp_desc->Format =
2656       dzn_image_get_placed_footprint_format(pdev, dst->vk.format, aspect);
2657    if (src_blkw != dst_blkw)
2658       tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
2659    if (src_blkh != dst_blkh)
2660       tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
2661 
2662    ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
2663                                        0, 1, 0,
2664                                        &tmp_loc->PlacedFootprint,
2665                                        NULL, NULL, NULL);
2666 
2667    if (src_blkd != dst_blkd) {
2668       tmp_loc->PlacedFootprint.Footprint.Depth =
2669          DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
2670    } else {
2671       tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
2672    }
2673 
2674    D3D12_BOX tmp_box = {
2675       .left = 0,
2676       .top = 0,
2677       .front = 0,
2678       .right = tmp_loc->PlacedFootprint.Footprint.Width,
2679       .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
2680       .back = tmp_loc->PlacedFootprint.Footprint.Depth,
2681    };
2682 
2683    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2684                                                 region.dstOffset.x,
2685                                                 region.dstOffset.y,
2686                                                 dst_z,
2687                                                 tmp_loc, &tmp_box);
2688 }
2689 
2690 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)2691 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
2692                                      VkImage image,
2693                                      VkImageAspectFlagBits aspect,
2694                                      const VkImageSubresourceLayers *subres,
2695                                      struct dzn_descriptor_heap *heap,
2696                                      uint32_t heap_slot)
2697 {
2698    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2699    VK_FROM_HANDLE(dzn_image, img, image);
2700    VkImageViewCreateInfo iview_info = {
2701       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2702       .image = image,
2703       .format = img->vk.format,
2704       .subresourceRange = {
2705          .aspectMask = (VkImageAspectFlags)aspect,
2706          .baseMipLevel = subres->mipLevel,
2707          .levelCount = 1,
2708          .baseArrayLayer = subres->baseArrayLayer,
2709          .layerCount = subres->layerCount,
2710       },
2711    };
2712 
2713    switch (img->vk.image_type) {
2714    case VK_IMAGE_TYPE_1D:
2715       iview_info.viewType = img->vk.array_layers > 1 ?
2716                             VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
2717       break;
2718    case VK_IMAGE_TYPE_2D:
2719       iview_info.viewType = img->vk.array_layers > 1 ?
2720                             VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
2721       break;
2722    case VK_IMAGE_TYPE_3D:
2723       iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2724       break;
2725    default:
2726       unreachable("Invalid type");
2727    }
2728 
2729    struct dzn_image_view iview;
2730    dzn_image_view_init(device, &iview, &iview_info);
2731    dzn_descriptor_heap_write_image_view_desc(device, heap, heap_slot, false, false, &iview);
2732    dzn_image_view_finish(&iview);
2733 
2734    D3D12_GPU_DESCRIPTOR_HANDLE handle =
2735       dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
2736    ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
2737 }
2738 
2739 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer,const VkOffset3D * dst_offsets)2740 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
2741                                      struct dzn_image *img,
2742                                      VkImageAspectFlagBits aspect,
2743                                      uint32_t level, uint32_t layer,
2744                                      const VkOffset3D *dst_offsets)
2745 {
2746    bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
2747    VkImageSubresourceRange range = {
2748       .aspectMask = (VkImageAspectFlags)aspect,
2749       .baseMipLevel = level,
2750       .levelCount = 1,
2751       .baseArrayLayer = layer,
2752       .layerCount = 1,
2753    };
2754 
2755    if (ds) {
2756       D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
2757       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
2758       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
2759 
2760       if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
2761          const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2762          if (!pdev->options.PSSpecifiedStencilRefSupported) {
2763             D3D12_RECT clear_rect = {
2764                .left = dst_offsets[0].x,
2765                .right = dst_offsets[1].x,
2766                .top = dst_offsets[0].y,
2767                .bottom = dst_offsets[1].y,
2768             };
2769             ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
2770          }
2771       }
2772    } else {
2773       D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
2774       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
2775       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, false, NULL);
2776    }
2777 }
2778 
2779 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,enum dzn_blit_resolve_mode resolve_mode,uint32_t stencil_bit)2780 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
2781                                  const struct dzn_image *src,
2782                                  const struct dzn_image *dst,
2783                                  VkImageAspectFlagBits aspect,
2784                                  VkFilter filter,
2785                                  enum dzn_blit_resolve_mode resolve_mode,
2786                                  uint32_t stencil_bit)
2787 {
2788    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2789    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2790    assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
2791    enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
2792    VkImageUsageFlags usage =
2793       vk_format_is_depth_or_stencil(dst->vk.format) ?
2794       VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
2795       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2796    struct dzn_meta_blit_key ctx_key = {
2797       .out_format = dzn_image_get_dxgi_format(pdev, dst->vk.format, usage, aspect),
2798       .samples = (uint32_t)src->vk.samples,
2799       .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
2800                         FRAG_RESULT_DEPTH :
2801                         aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
2802                         FRAG_RESULT_STENCIL :
2803                         FRAG_RESULT_DATA0),
2804       .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
2805                              util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
2806                              aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
2807                              GLSL_TYPE_FLOAT),
2808       .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
2809                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
2810                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
2811                                 GLSL_SAMPLER_DIM_3D),
2812       .src_is_array = src->vk.array_layers > 1,
2813       .resolve_mode = resolve_mode,
2814       /* Filter doesn't need to be part of the key if we're not embedding a static sampler */
2815       .linear_filter = filter == VK_FILTER_LINEAR && device->support_static_samplers,
2816       .stencil_bit = stencil_bit,
2817       .padding = 0,
2818    };
2819 
2820    const struct dzn_meta_blit *ctx =
2821       dzn_meta_blits_get_context(device, &ctx_key);
2822    assert(ctx);
2823 
2824    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2825    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
2826       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
2827       ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2828    }
2829    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2830 }
2831 
2832 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2833 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2834                                   const struct dzn_image *src,
2835                                   const VkImageSubresourceLayers *src_subres,
2836                                   const VkOffset3D *src_offsets,
2837                                   const struct dzn_image *dst,
2838                                   const VkImageSubresourceLayers *dst_subres,
2839                                   const VkOffset3D *dst_offsets,
2840                                   bool normalize_src_coords)
2841 {
2842    uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2843    uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2844    uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2845    uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2846 
2847    float dst_pos[4] = {
2848       (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2849       (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2850    };
2851 
2852    float src_pos[4] = {
2853       (float)src_offsets[0].x, (float)src_offsets[0].y,
2854       (float)src_offsets[1].x, (float)src_offsets[1].y,
2855    };
2856 
2857    if (normalize_src_coords) {
2858       src_pos[0] /= src_w;
2859       src_pos[1] /= src_h;
2860       src_pos[2] /= src_w;
2861       src_pos[3] /= src_h;
2862    }
2863 
2864    float coords[] = {
2865       dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2866       dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2867       dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2868       dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2869    };
2870 
2871    ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2872 
2873    D3D12_VIEWPORT vp = {
2874       .TopLeftX = 0,
2875       .TopLeftY = 0,
2876       .Width = (float)dst_w,
2877       .Height = (float)dst_h,
2878       .MinDepth = 0,
2879       .MaxDepth = 1,
2880    };
2881    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2882 
2883    D3D12_RECT scissor = {
2884       .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2885       .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2886       .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2887       .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2888    };
2889    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2890 }
2891 
2892 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,D3D12_BARRIER_LAYOUT * restore_src_layout,D3D12_BARRIER_LAYOUT * restore_dst_layout,bool post)2893 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2894                                    struct dzn_image *src, VkImageLayout src_layout,
2895                                    const VkImageSubresourceLayers *src_subres,
2896                                    struct dzn_image *dst, VkImageLayout dst_layout,
2897                                    const VkImageSubresourceLayers *dst_subres,
2898                                    VkImageAspectFlagBits aspect,
2899                                    D3D12_BARRIER_LAYOUT *restore_src_layout,
2900                                    D3D12_BARRIER_LAYOUT *restore_dst_layout,
2901                                    bool post)
2902 {
2903    VkImageSubresourceRange src_range = {
2904       .aspectMask = aspect,
2905       .baseMipLevel = src_subres->mipLevel,
2906       .levelCount = 1,
2907       .baseArrayLayer = src_subres->baseArrayLayer,
2908       .layerCount = src_subres->layerCount,
2909    };
2910    VkImageSubresourceRange dst_range = {
2911       .aspectMask = aspect,
2912       .baseMipLevel = dst_subres->mipLevel,
2913       .levelCount = 1,
2914       .baseArrayLayer = dst_subres->baseArrayLayer,
2915       .layerCount = dst_subres->layerCount,
2916    };
2917 
2918    if (!post) {
2919       if (cmdbuf->enhanced_barriers) {
2920          D3D12_BARRIER_LAYOUT dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2921             D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
2922          *restore_src_layout = dzn_cmd_buffer_require_layout(cmdbuf, src, src_layout,
2923                                                              D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
2924                                                              &src_range);
2925          *restore_dst_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst,
2926                                                              dst_layout,
2927                                                              dst_new_layout,
2928                                                              &dst_range);
2929       } else {
2930          VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2931                                           VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2932          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2933                                                             src_layout,
2934                                                             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2935                                                             DZN_QUEUE_TRANSITION_FLUSH);
2936          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2937                                                             dst_layout,
2938                                                             dst_new_layout,
2939                                                             DZN_QUEUE_TRANSITION_FLUSH);
2940       }
2941    } else {
2942       if (cmdbuf->enhanced_barriers) {
2943          dzn_cmd_buffer_restore_layout(cmdbuf, src,
2944                                        D3D12_BARRIER_SYNC_PIXEL_SHADING, D3D12_BARRIER_ACCESS_SHADER_RESOURCE,
2945                                        D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, *restore_src_layout,
2946                                        &src_range);
2947          if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2948             dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2949                                           D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2950                                           D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, *restore_dst_layout,
2951                                           &dst_range);
2952          } else {
2953             dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2954                                           D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2955                                           D3D12_BARRIER_LAYOUT_RENDER_TARGET, *restore_dst_layout,
2956                                           &dst_range);
2957          }
2958       } else {
2959          VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2960                                           VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2961          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2962                                                             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2963                                                             src_layout,
2964                                                             DZN_QUEUE_TRANSITION_FLUSH);
2965          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2966                                                             dst_new_layout,
2967                                                             dst_layout,
2968                                                             DZN_QUEUE_TRANSITION_FLUSH);
2969       }
2970    }
2971 }
2972 
2973 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)2974 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2975                            const VkBlitImageInfo2 *info,
2976                            struct dzn_descriptor_heap *heap,
2977                            uint32_t *heap_slot,
2978                            struct dzn_descriptor_heap *sampler_heap,
2979                            uint32_t sampler_heap_slot,
2980                            uint32_t r)
2981 {
2982    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2983    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2984 
2985    const VkImageBlit2 *region = &info->pRegions[r];
2986    bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2987    bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2988    const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2989    const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2990    bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
2991    uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
2992    uint32_t stencil_bit_root_param_slot = 2;
2993    assert(device->support_static_samplers == (sampler_heap == NULL));
2994 
2995    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2996       D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
2997       D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
2998       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
2999       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3000                                          src, info->srcImageLayout, &region->srcSubresource,
3001                                          dst, info->dstImageLayout, &region->dstSubresource,
3002                                          aspect, &restore_src_layout, &restore_dst_layout, false);
3003       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
3004                                            aspect, &region->srcSubresource,
3005                                            heap, (*heap_slot)++);
3006       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3007                                         src, &region->srcSubresource, region->srcOffsets,
3008                                         dst, &region->dstSubresource, region->dstOffsets,
3009                                         src->vk.samples == 1);
3010 
3011       uint32_t dst_depth =
3012          region->dstOffsets[1].z > region->dstOffsets[0].z ?
3013          region->dstOffsets[1].z - region->dstOffsets[0].z :
3014          region->dstOffsets[0].z - region->dstOffsets[1].z;
3015       uint32_t src_depth =
3016          region->srcOffsets[1].z > region->srcOffsets[0].z ?
3017          region->srcOffsets[1].z - region->srcOffsets[0].z :
3018          region->srcOffsets[0].z - region->srcOffsets[1].z;
3019 
3020       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
3021       uint32_t dst_level = region->dstSubresource.mipLevel;
3022 
3023       float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
3024       if (region->srcOffsets[0].z > region->srcOffsets[1].z)
3025          src_slice_step = -src_slice_step;
3026       float src_z_coord =
3027          src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
3028       uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
3029       uint32_t dst_z_coord =
3030          dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
3031       if (region->dstOffsets[0].z > region->dstOffsets[1].z)
3032          dst_z_coord--;
3033 
3034       uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
3035                                 1 : -1;
3036 
3037       /* Normalize the src coordinates/step */
3038       if (src_is_3d) {
3039          src_z_coord /= src->vk.extent.depth;
3040          src_slice_step /= src->vk.extent.depth;
3041       }
3042 
3043       for (uint32_t slice = 0; slice < slice_count; slice++) {
3044          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
3045          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3046          if (!device->support_static_samplers) {
3047             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3048             stencil_bit_root_param_slot++;
3049          }
3050          if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3051             cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3052             ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
3053             for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3054                dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
3055                ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3056                ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3057             }
3058          } else {
3059             ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3060          }
3061          src_z_coord += src_slice_step;
3062          dst_z_coord += dst_slice_step;
3063       }
3064 
3065       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3066                                          src, info->srcImageLayout, &region->srcSubresource,
3067                                          dst, info->dstImageLayout, &region->dstSubresource,
3068                                          aspect, &restore_src_layout, &restore_dst_layout, true);
3069    }
3070 }
3071 
3072 static enum dzn_blit_resolve_mode
get_blit_resolve_mode(VkResolveModeFlagBits mode)3073 get_blit_resolve_mode(VkResolveModeFlagBits mode)
3074 {
3075    switch (mode) {
3076    case VK_RESOLVE_MODE_AVERAGE_BIT: return dzn_blit_resolve_average;
3077    case VK_RESOLVE_MODE_MIN_BIT: return dzn_blit_resolve_min;
3078    case VK_RESOLVE_MODE_MAX_BIT: return dzn_blit_resolve_max;
3079    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return dzn_blit_resolve_sample_zero;
3080    default: unreachable("Unexpected resolve mode");
3081    }
3082 }
3083 
3084 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,VkResolveModeFlags mode,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)3085 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
3086                               const VkResolveImageInfo2 *info,
3087                               VkResolveModeFlags mode,
3088                               struct dzn_descriptor_heap *heap,
3089                               uint32_t *heap_slot,
3090                               struct dzn_descriptor_heap *sampler_heap,
3091                               uint32_t sampler_heap_slot,
3092                               uint32_t r)
3093 {
3094    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3095    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3096 
3097    const VkImageResolve2 *region = &info->pRegions[r];
3098 
3099    const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3100    const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3101    bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
3102    uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
3103    uint32_t stencil_bit_root_param_slot = 2;
3104    assert(device->support_static_samplers == (sampler_heap == NULL));
3105    enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
3106 
3107    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3108       D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
3109       D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
3110       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3111       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3112                                          src, info->srcImageLayout, &region->srcSubresource,
3113                                          dst, info->dstImageLayout, &region->dstSubresource,
3114                                          aspect, &restore_src_layout, &restore_dst_layout, false);
3115       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
3116                                            &region->srcSubresource,
3117                                            heap, (*heap_slot)++);
3118 
3119       VkOffset3D src_offset[2] = {
3120          {
3121             .x = region->srcOffset.x,
3122             .y = region->srcOffset.y,
3123          },
3124          {
3125             .x = (int32_t)(region->srcOffset.x + region->extent.width),
3126             .y = (int32_t)(region->srcOffset.y + region->extent.height),
3127          },
3128       };
3129       VkOffset3D dst_offset[2] = {
3130          {
3131             .x = region->dstOffset.x,
3132             .y = region->dstOffset.y,
3133          },
3134          {
3135             .x = (int32_t)(region->dstOffset.x + region->extent.width),
3136             .y = (int32_t)(region->dstOffset.y + region->extent.height),
3137          },
3138       };
3139 
3140       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3141                                         src, &region->srcSubresource, src_offset,
3142                                         dst, &region->dstSubresource, dst_offset,
3143                                         false);
3144 
3145       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
3146       for (uint32_t layer = 0; layer < layer_count; layer++) {
3147          float src_z_coord = layer;
3148 
3149          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
3150                                               dst, aspect, region->dstSubresource.mipLevel,
3151                                               region->dstSubresource.baseArrayLayer + layer,
3152                                               dst_offset);
3153          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3154          if (!device->support_static_samplers) {
3155             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3156             stencil_bit_root_param_slot++;
3157          }
3158          if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3159             cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3160             ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
3161             for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3162                dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3163                ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3164                ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3165             }
3166          } else {
3167             ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3168          }
3169       }
3170 
3171       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3172                                          src, info->srcImageLayout, &region->srcSubresource,
3173                                          dst, info->dstImageLayout, &region->dstSubresource,
3174                                          aspect, &restore_src_layout, &restore_dst_layout, true);
3175    }
3176 }
3177 
3178 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3179 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3180 {
3181    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3182 
3183    if (!pipeline)
3184       return;
3185 
3186    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3187    ID3D12PipelineState *old_pipeline_state =
3188       cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
3189 
3190    uint32_t view_instance_mask = 0;
3191    if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
3192       if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) {
3193          cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig;
3194          /* Changing root signature always requires re-binding descriptor heaps */
3195          cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3196 
3197          if (device->bindless) {
3198             /* Note: The D3D12 spec for descriptor heap indexing requires that the descriptor heaps
3199              * are bound *before* the root signature. */
3200             bool bind_heaps = false;
3201             dzn_foreach_pool_type(type) {
3202                if (cmdbuf->state.heaps[type] != &device->device_heaps[type].heap) {
3203                   bind_heaps = true;
3204                   cmdbuf->state.heaps[type] = &device->device_heaps[type].heap;
3205                }
3206             }
3207             if (bind_heaps) {
3208                ID3D12DescriptorHeap *heaps[NUM_POOL_TYPES];
3209                dzn_foreach_pool_type(type)
3210                   heaps[type] = cmdbuf->state.heaps[type]->heap;
3211                ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, NUM_POOL_TYPES, heaps);
3212             }
3213          }
3214 
3215          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3216             ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3217          else
3218             ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3219       }
3220       if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3221          struct dzn_graphics_pipeline *gfx =
3222             (struct dzn_graphics_pipeline *)pipeline;
3223          ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
3224          dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
3225          if (gfx->multiview.native_view_instancing)
3226             view_instance_mask = gfx->multiview.view_mask;
3227          else
3228             view_instance_mask = 1;
3229 
3230          if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point)
3231             cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3232       }
3233    }
3234 
3235    ID3D12PipelineState *new_pipeline_state = pipeline->state;
3236 
3237    if (old_pipeline_state != new_pipeline_state) {
3238       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
3239       cmdbuf->state.pipeline = pipeline;
3240    }
3241 
3242    /* Deferring this until after the pipeline has been set due to an NVIDIA driver bug
3243     * when view instancing mask is set with no pipeline bound. */
3244    if (view_instance_mask)
3245       ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask);
3246 }
3247 
3248 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3249 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3250 {
3251    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3252    struct dzn_descriptor_state *desc_state =
3253       &cmdbuf->state.bindpoint[bindpoint].desc_state;
3254    struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
3255       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
3256       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
3257    };
3258    uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
3259    bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
3260    const struct dzn_pipeline *pipeline =
3261       cmdbuf->state.bindpoint[bindpoint].pipeline;
3262 
3263    /* The set of dirty bits that are cleared by running this function. Notably,
3264     * for bindless, descriptor sets that are bound but unused by the currently
3265     * set pipeline are not processed, meaning their dirty bits should persist
3266     * until such a point as a pipeline does use them. For not-bindless,
3267     * all sets are processed. */
3268    uint32_t dirty_bits_bindless =
3269       (pipeline->dynamic_buffer_count ? DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS : 0) |
3270       (((DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << pipeline->set_count) - 1) & DZN_CMD_BINDPOINT_DIRTY_DESC_SETS);
3271    uint32_t dirty_bits = (device->bindless ? dirty_bits_bindless : DZN_CMD_BINDPOINT_DIRTY_DESC_SETS | DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS);
3272    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bits))
3273       return;
3274 
3275    dzn_foreach_pool_type (type) {
3276       if (device->bindless) {
3277          new_heaps[type] = &device->device_heaps[type].heap;
3278       } else {
3279          uint32_t desc_count = pipeline->desc_count[type];
3280          if (!desc_count)
3281             continue;
3282 
3283          struct dzn_descriptor_heap_pool *pool =
3284             type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
3285             &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
3286          struct dzn_descriptor_heap *dst_heap = NULL;
3287          uint32_t dst_heap_offset = 0;
3288 
3289          dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
3290                                               &dst_heap, &dst_heap_offset);
3291          new_heap_offsets[type] = dst_heap_offset;
3292          update_root_desc_table[type] = true;
3293 
3294          for (uint32_t s = 0; s < MAX_SETS; s++) {
3295             const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3296             if (!set) continue;
3297 
3298             uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
3299             uint32_t set_desc_count = MIN2(pipeline->sets[s].range_desc_count[type], set->heap_sizes[type]);
3300             if (set_desc_count) {
3301                dzn_descriptor_heap_copy(device, dst_heap, dst_heap_offset + set_heap_offset,
3302                                         &set->pool->heaps[type], set->heap_offsets[type],
3303                                         set_desc_count, type);
3304             }
3305 
3306             if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
3307                uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3308                for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3309                   struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
3310                   if (!bdesc.buffer)
3311                      continue;
3312                   bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
3313 
3314                   bool primary_is_writable = bdesc.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
3315                   uint32_t desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary;
3316                   dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3317                                                         dst_heap_offset + set_heap_offset + desc_heap_offset,
3318                                                         primary_is_writable, &bdesc);
3319 
3320                   if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt != ~0) {
3321                      assert(primary_is_writable);
3322                      desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt;
3323                      dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3324                                                            dst_heap_offset + set_heap_offset + desc_heap_offset,
3325                                                            false, &bdesc);
3326                   }
3327                }
3328             }
3329          }
3330 
3331          new_heaps[type] = dst_heap;
3332       }
3333    }
3334 
3335    if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
3336        new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
3337       ID3D12DescriptorHeap *desc_heaps[2];
3338       uint32_t num_desc_heaps = 0;
3339       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
3340          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
3341       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
3342          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
3343       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
3344 
3345       for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
3346          cmdbuf->state.heaps[h] = new_heaps[h];
3347    }
3348 
3349    if (!device->bindless) {
3350       for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
3351          D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
3352 
3353          if (!update_root_desc_table[type])
3354             continue;
3355 
3356          D3D12_GPU_DESCRIPTOR_HANDLE handle =
3357             dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
3358 
3359          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3360             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3361          else
3362             ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3363       }
3364    }
3365 
3366    if (device->bindless) {
3367       for (uint32_t s = 0; s < pipeline->set_count; ++s) {
3368          const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3369          if (!set || !set->pool->bindless.buf)
3370             continue;
3371 
3372          uint32_t dirty_bit = DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << s;
3373          if (cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bit) {
3374             uint64_t gpuva = set->pool->bindless.gpuva + (set->heap_offsets[0] * sizeof(struct dxil_spirv_bindless_entry));
3375             if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3376                ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3377             else
3378                ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3379          }
3380       }
3381       if (pipeline->dynamic_buffer_count &&
3382           (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS)) {
3383          ID3D12Resource *dynamic_buffer_buf = NULL;
3384          uint64_t dynamic_buffer_buf_offset;
3385          VkResult result =
3386             dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(struct dxil_spirv_bindless_entry) * pipeline->dynamic_buffer_count,
3387                                               DZN_INTERNAL_BUF_UPLOAD,
3388                                               D3D12_RESOURCE_STATE_GENERIC_READ,
3389                                               D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT,
3390                                               &dynamic_buffer_buf,
3391                                               &dynamic_buffer_buf_offset);
3392          if (result != VK_SUCCESS)
3393             return;
3394 
3395          uint64_t gpuva = ID3D12Resource_GetGPUVirtualAddress(dynamic_buffer_buf) + dynamic_buffer_buf_offset;
3396          struct dxil_spirv_bindless_entry *map;
3397          ID3D12Resource_Map(dynamic_buffer_buf, 0, NULL, (void **)&map);
3398          map += (dynamic_buffer_buf_offset / sizeof(*map));
3399 
3400          for (uint32_t s = 0; s < MAX_SETS; ++s) {
3401             const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3402             if (!set)
3403                continue;
3404 
3405             uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3406             for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3407                const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o];
3408                volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary];
3409                struct dzn_buffer_desc bdesc_updated = *bdesc;
3410                bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o];
3411                dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry);
3412             }
3413          }
3414 
3415          ID3D12Resource_Unmap(dynamic_buffer_buf, 0, NULL);
3416          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3417             ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist,
3418                                                                          pipeline->root.dynamic_buffer_bindless_param_idx,
3419                                                                          gpuva);
3420          else
3421             ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3422                                                                         pipeline->root.dynamic_buffer_bindless_param_idx,
3423                                                                         gpuva);
3424       }
3425    }
3426 
3427    cmdbuf->state.bindpoint[bindpoint].dirty &= ~dirty_bits;
3428 }
3429 
3430 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3431 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3432 {
3433    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
3434       return;
3435 
3436    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3437    uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
3438 
3439    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3440       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3441                                                      sizeof(cmdbuf->state.sysvals.gfx) / 4,
3442                                                      &cmdbuf->state.sysvals.gfx, 0);
3443    } else {
3444       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3445                                                     sizeof(cmdbuf->state.sysvals.compute) / 4,
3446                                                     &cmdbuf->state.sysvals.compute, 0);
3447    }
3448 
3449    cmdbuf->state.bindpoint[bindpoint].dirty &= ~DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3450 }
3451 
3452 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)3453 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
3454 {
3455    const struct dzn_graphics_pipeline *pipeline =
3456       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3457 
3458    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
3459        !pipeline->vp.count)
3460       return;
3461 
3462    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
3463 }
3464 
3465 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)3466 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
3467 {
3468    const struct dzn_graphics_pipeline *pipeline =
3469       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3470 
3471    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
3472       return;
3473 
3474    if (!pipeline->scissor.count) {
3475       /* Apply a scissor delimiting the render area. */
3476       ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
3477       return;
3478    }
3479 
3480    D3D12_RECT scissors[MAX_SCISSOR];
3481 
3482    memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
3483    for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
3484       scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
3485       scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
3486       scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
3487       scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
3488    }
3489 
3490    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
3491 }
3492 
3493 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)3494 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
3495 {
3496    unsigned start, end;
3497 
3498    BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
3499       ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, &cmdbuf->state.vb.views[start]);
3500 
3501    BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
3502 }
3503 
3504 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)3505 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
3506 {
3507    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
3508       return;
3509 
3510    ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
3511 }
3512 
3513 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3514 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3515 {
3516    struct dzn_cmd_buffer_push_constant_state *state =
3517       bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
3518       &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
3519 
3520    uint32_t offset = state->offset / 4;
3521    uint32_t end = ALIGN(state->end, 4) / 4;
3522    uint32_t count = end - offset;
3523 
3524    if (!count)
3525       return;
3526 
3527    uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
3528    uint32_t *vals = state->values + offset;
3529 
3530    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3531       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3532    else
3533       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3534 
3535    state->offset = 0;
3536    state->end = 0;
3537 }
3538 
3539 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)3540 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
3541 {
3542    struct dzn_physical_device *pdev =
3543       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
3544    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
3545       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
3546          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3547       if (cmdbuf->cmdlist8 &&
3548           pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
3549          ID3D12GraphicsCommandList8_OMSetFrontAndBackStencilRef(cmdbuf->cmdlist8,
3550                                                                 cmdbuf->state.zsa.stencil_test.front.ref,
3551                                                                 cmdbuf->state.zsa.stencil_test.back.ref);
3552       } else {
3553          uint32_t ref =
3554             gfx->zsa.stencil_test.front.uses_ref ?
3555             cmdbuf->state.zsa.stencil_test.front.ref :
3556             cmdbuf->state.zsa.stencil_test.back.ref;
3557          ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
3558       }
3559    }
3560 }
3561 
3562 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)3563 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
3564 {
3565    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
3566       ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
3567                                                   cmdbuf->state.blend.constants);
3568 }
3569 
3570 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)3571 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
3572 {
3573    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
3574       ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
3575                                                   cmdbuf->state.zsa.depth_bounds.min,
3576                                                   cmdbuf->state.zsa.depth_bounds.max);
3577    }
3578 }
3579 
3580 static void
dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer * cmdbuf)3581 dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer *cmdbuf)
3582 {
3583    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BIAS) {
3584       assert(cmdbuf->cmdlist9);
3585       ID3D12GraphicsCommandList9_RSSetDepthBias(cmdbuf->cmdlist9,
3586                                                 cmdbuf->state.pipeline_variant.depth_bias.constant_factor,
3587                                                 cmdbuf->state.pipeline_variant.depth_bias.clamp,
3588                                                 cmdbuf->state.pipeline_variant.depth_bias.slope_factor);
3589    }
3590 }
3591 
3592 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)3593 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
3594 {
3595    uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
3596    uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
3597 
3598    *vertex_count = triangle_count * 3;
3599    if (!*vertex_count)
3600       return VK_SUCCESS;
3601 
3602    ID3D12Resource *index_buf;
3603    uint64_t index_offset;
3604    VkResult result =
3605       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
3606                                         DZN_INTERNAL_BUF_UPLOAD,
3607                                         D3D12_RESOURCE_STATE_GENERIC_READ,
3608                                         index_size,
3609                                         &index_buf,
3610                                         &index_offset);
3611    if (result != VK_SUCCESS)
3612       return result;
3613 
3614    void *cpu_ptr;
3615    ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
3616    cpu_ptr = (uint8_t *)cpu_ptr + index_offset;
3617 
3618    /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
3619    if (index_size == 2) {
3620       uint16_t *indices = (uint16_t *)cpu_ptr;
3621       for (uint32_t t = 0; t < triangle_count; t++) {
3622          indices[t * 3] = t + 1;
3623          indices[(t * 3) + 1] = t + 2;
3624          indices[(t * 3) + 2] = 0;
3625       }
3626       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
3627    } else {
3628       uint32_t *indices = (uint32_t *)cpu_ptr;
3629       for (uint32_t t = 0; t < triangle_count; t++) {
3630          indices[t * 3] = t + 1;
3631          indices[(t * 3) + 1] = t + 2;
3632          indices[(t * 3) + 2] = 0;
3633       }
3634       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3635    }
3636 
3637    cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
3638    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf) + index_offset;
3639    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3640    return VK_SUCCESS;
3641 }
3642 
3643 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)3644 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
3645                                           uint32_t *index_count,
3646                                           uint32_t *first_index)
3647 {
3648    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3649    uint32_t triangle_count = MAX2(*index_count, 2) - 2;
3650 
3651    *index_count = triangle_count * 3;
3652    if (!*index_count)
3653       return VK_SUCCESS;
3654 
3655    /* New index is always 32bit to make the compute shader rewriting the
3656     * index simpler */
3657    ID3D12Resource *new_index_buf;
3658    VkResult result =
3659       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
3660                                         DZN_INTERNAL_BUF_DEFAULT,
3661                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3662                                         4,
3663                                         &new_index_buf,
3664                                         NULL);
3665    if (result != VK_SUCCESS)
3666       return result;
3667 
3668    D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
3669       cmdbuf->state.ib.view.BufferLocation;
3670 
3671    ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
3672       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3673    ASSERTED bool prim_restart =
3674       dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
3675 
3676    assert(!prim_restart);
3677 
3678    enum dzn_index_type index_type =
3679       dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
3680    const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3681       &device->triangle_fan[index_type];
3682 
3683    struct dzn_triangle_fan_rewrite_index_params params = {
3684       .first_index = *first_index,
3685    };
3686 
3687    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3688    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3689    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3690    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3691    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
3692    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
3693                                                  &params, 0);
3694    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
3695    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
3696 
3697    if (cmdbuf->enhanced_barriers) {
3698       dzn_cmd_buffer_buffer_barrier(cmdbuf, new_index_buf,
3699                                     D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_INDEX_INPUT,
3700                                     D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDEX_BUFFER);
3701    } else {
3702       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
3703                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3704                                                D3D12_RESOURCE_STATE_INDEX_BUFFER,
3705                                                DZN_QUEUE_TRANSITION_FLUSH);
3706    }
3707 
3708    /* We don't mess up with the driver state when executing our internal
3709     * compute shader, but we still change the D3D12 state, so let's mark
3710     * things dirty if needed.
3711     */
3712    cmdbuf->state.pipeline = NULL;
3713    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3714       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3715          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3716    }
3717 
3718    cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
3719    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
3720    cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3721    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3722    *first_index = 0;
3723    return VK_SUCCESS;
3724 }
3725 
3726 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)3727 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3728 {
3729    if (indexed)
3730       dzn_cmd_buffer_update_ibview(cmdbuf);
3731 
3732    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3733    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3734    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3735    dzn_cmd_buffer_update_viewports(cmdbuf);
3736    dzn_cmd_buffer_update_scissors(cmdbuf);
3737    dzn_cmd_buffer_update_vbviews(cmdbuf);
3738    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3739    dzn_cmd_buffer_update_zsa(cmdbuf);
3740    dzn_cmd_buffer_update_blend_constants(cmdbuf);
3741    dzn_cmd_buffer_update_depth_bounds(cmdbuf);
3742    dzn_cmd_buffer_update_depth_bias(cmdbuf);
3743 
3744    /* Reset the dirty states */
3745    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3746    cmdbuf->state.dirty = 0;
3747 }
3748 
3749 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)3750 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3751 {
3752    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3753       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3754 
3755    if (!pipeline->ia.triangle_fan)
3756       return 0;
3757 
3758    uint32_t max_triangles;
3759 
3760    if (indexed) {
3761       uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
3762       uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
3763 
3764       max_triangles = MAX2(max_indices, 2) - 2;
3765    } else {
3766       uint32_t max_vertex = 0;
3767       for (uint32_t i = 0; i < pipeline->vb.count; i++) {
3768          max_vertex =
3769             MAX2(max_vertex,
3770                  cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
3771       }
3772 
3773       max_triangles = MAX2(max_vertex, 2) - 2;
3774    }
3775 
3776    return max_triangles * 3;
3777 }
3778 
3779 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)3780 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
3781                              ID3D12Resource *draw_buf,
3782                              size_t draw_buf_offset,
3783                              ID3D12Resource *count_buf,
3784                              size_t count_buf_offset,
3785                              uint32_t max_draw_count,
3786                              uint32_t draw_buf_stride,
3787                              bool indexed)
3788 {
3789    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3790    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3791       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3792    uint32_t min_draw_buf_stride =
3793       indexed ?
3794       sizeof(struct dzn_indirect_indexed_draw_params) :
3795       sizeof(struct dzn_indirect_draw_params);
3796    bool prim_restart =
3797       dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
3798 
3799    draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
3800    assert(draw_buf_stride >= min_draw_buf_stride);
3801    assert((draw_buf_stride & 3) == 0);
3802 
3803    uint32_t triangle_fan_index_buf_stride =
3804       dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
3805       sizeof(uint32_t);
3806    uint32_t exec_buf_stride =
3807       triangle_fan_index_buf_stride > 0 ?
3808       sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
3809       sizeof(struct dzn_indirect_draw_exec_params);
3810    uint32_t triangle_fan_exec_buf_stride =
3811       sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
3812    uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
3813    uint32_t exec_buf_draw_offset = 0;
3814 
3815    // We reserve the first slot for the draw_count value when indirect count is
3816    // involved.
3817    if (count_buf != NULL) {
3818       exec_buf_size += exec_buf_stride;
3819       exec_buf_draw_offset = exec_buf_stride;
3820    }
3821 
3822    ID3D12Resource *exec_buf;
3823    VkResult result =
3824       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
3825                                         DZN_INTERNAL_BUF_DEFAULT,
3826                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3827                                         0,
3828                                         &exec_buf, NULL);
3829    if (result != VK_SUCCESS)
3830       return;
3831 
3832    D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
3833       ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
3834    ID3D12Resource *triangle_fan_index_buf = NULL;
3835    ID3D12Resource *triangle_fan_exec_buf = NULL;
3836 
3837    if (triangle_fan_index_buf_stride) {
3838       result =
3839          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3840                                            max_draw_count * triangle_fan_index_buf_stride,
3841                                            DZN_INTERNAL_BUF_DEFAULT,
3842                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3843                                            0,
3844                                            &triangle_fan_index_buf, NULL);
3845       if (result != VK_SUCCESS)
3846          return;
3847 
3848       result =
3849          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3850                                            max_draw_count * triangle_fan_exec_buf_stride,
3851                                            DZN_INTERNAL_BUF_DEFAULT,
3852                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3853                                            0,
3854                                            &triangle_fan_exec_buf, NULL);
3855       if (result != VK_SUCCESS)
3856          return;
3857    }
3858 
3859    struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
3860       .draw_buf_stride = draw_buf_stride,
3861       .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
3862       .triangle_fan_index_buf_start =
3863          triangle_fan_index_buf ?
3864          ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
3865       .exec_buf_start =
3866          prim_restart ?
3867          ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
3868    };
3869    uint32_t params_size;
3870    if (triangle_fan_index_buf_stride > 0 && prim_restart)
3871       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
3872    else if (triangle_fan_index_buf_stride > 0)
3873       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
3874    else
3875       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
3876 
3877    enum dzn_indirect_draw_type draw_type;
3878 
3879    if (indexed && triangle_fan_index_buf_stride > 0) {
3880       if (prim_restart && count_buf)
3881          draw_type =  DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
3882       else if (prim_restart && !count_buf)
3883          draw_type =  DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
3884       else if (!prim_restart && count_buf)
3885          draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
3886       else
3887          draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
3888    } else if (!indexed && triangle_fan_index_buf_stride > 0) {
3889       draw_type = count_buf ?
3890                   DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
3891                   DZN_INDIRECT_DRAW_TRIANGLE_FAN;
3892    } else if (indexed) {
3893       draw_type = count_buf ?
3894                   DZN_INDIRECT_INDEXED_DRAW_COUNT :
3895                   DZN_INDIRECT_INDEXED_DRAW;
3896    } else {
3897       draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW;
3898    }
3899 
3900    struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
3901    uint32_t root_param_idx = 0;
3902 
3903    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3904    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3905    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
3906    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
3907    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3908                                                            params_size / 4, (const void *)&params, 0);
3909    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
3910                                                                draw_buf_gpu);
3911    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3912                                                                 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
3913    if (count_buf) {
3914       ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3915                                                                   root_param_idx++,
3916                                                                   ID3D12Resource_GetGPUVirtualAddress(count_buf) +
3917                                                                   count_buf_offset);
3918    }
3919 
3920    if (triangle_fan_exec_buf) {
3921       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
3922                                                                    root_param_idx++,
3923                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
3924    }
3925 
3926    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
3927 
3928    D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
3929    D3D12_BUFFER_BARRIER buf_barriers[2];
3930    D3D12_BARRIER_GROUP enhanced_barriers = {
3931       .NumBarriers = 0,
3932       .Type = D3D12_BARRIER_TYPE_BUFFER,
3933       .pBufferBarriers = buf_barriers
3934    };
3935 
3936    if (triangle_fan_exec_buf) {
3937       enum dzn_index_type index_type =
3938          indexed ?
3939          dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
3940          DZN_NO_INDEX;
3941       struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3942          &device->triangle_fan[index_type];
3943 
3944       struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
3945 
3946       assert(rewrite_index->root_sig);
3947       assert(rewrite_index->pipeline_state);
3948       assert(rewrite_index->cmd_sig);
3949 
3950       if (cmdbuf->enhanced_barriers) {
3951          dzn_cmd_buffer_buffer_barrier(cmdbuf, triangle_fan_exec_buf,
3952                                        D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3953                                        D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
3954       } else {
3955          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
3956                                                   D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3957                                                   D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3958                                                   DZN_QUEUE_TRANSITION_FLUSH);
3959       }
3960 
3961       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3962       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3963       ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3964       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3965       root_param_idx = 0;
3966       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3967                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
3968       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3969                                                               sizeof(rewrite_index_params) / 4,
3970                                                               (const void *)&rewrite_index_params, 0);
3971 
3972       if (indexed) {
3973          ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3974                                                                      root_param_idx++,
3975                                                                      cmdbuf->state.ib.view.BufferLocation);
3976       }
3977 
3978       ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
3979                                                  max_draw_count, triangle_fan_exec_buf, 0,
3980                                                  count_buf ? exec_buf : NULL, 0);
3981 
3982       if (cmdbuf->enhanced_barriers) {
3983          buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3984             .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3985             .SyncAfter = D3D12_BARRIER_SYNC_INDEX_INPUT,
3986             .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3987             .AccessAfter = D3D12_BARRIER_ACCESS_INDEX_BUFFER,
3988             .pResource = triangle_fan_index_buf,
3989             .Offset = 0, .Size = UINT64_MAX
3990          };
3991       } else {
3992          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
3993                                                   D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3994                                                   D3D12_RESOURCE_STATE_INDEX_BUFFER,
3995                                                   DZN_QUEUE_TRANSITION_FLUSH);
3996       }
3997 
3998       /* After our triangle-fan lowering the draw is indexed */
3999       indexed = true;
4000       ib_view = cmdbuf->state.ib.view;
4001       cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
4002       cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
4003       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
4004       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4005    }
4006 
4007    if (cmdbuf->enhanced_barriers) {
4008       buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
4009          .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
4010          .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
4011          .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
4012          .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT,
4013          .pResource = exec_buf,
4014          .Offset = 0, .Size = UINT64_MAX
4015       };
4016       ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &enhanced_barriers);
4017    } else {
4018       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
4019                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
4020                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
4021                                                DZN_QUEUE_TRANSITION_FLUSH);
4022    }
4023 
4024    /* We don't mess up with the driver state when executing our internal
4025     * compute shader, but we still change the D3D12 state, so let's mark
4026     * things dirty if needed.
4027     */
4028    cmdbuf->state.pipeline = NULL;
4029    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
4030       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4031          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4032    }
4033 
4034    enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
4035       triangle_fan_index_buf_stride > 0 ?
4036       DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
4037       indexed ?
4038       DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
4039       DZN_INDIRECT_DRAW_CMD_SIG;
4040    ID3D12CommandSignature *cmdsig =
4041       dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
4042 
4043    if (!cmdsig) {
4044       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4045       return;
4046    }
4047 
4048    cmdbuf->state.sysvals.gfx.first_vertex = 0;
4049    cmdbuf->state.sysvals.gfx.base_instance = 0;
4050    cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
4051 
4052    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
4053       1 : pipeline->multiview.view_mask;
4054    u_foreach_bit(view, view_mask) {
4055       cmdbuf->state.sysvals.gfx.view_index = view;
4056       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4057          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4058 
4059       dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
4060 
4061       ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
4062                                                  max_draw_count,
4063                                                  exec_buf, exec_buf_draw_offset,
4064                                                  count_buf ? exec_buf : NULL, 0);
4065    }
4066 
4067    /* Restore the old IB view if we modified it during the triangle fan lowering */
4068    if (ib_view.SizeInBytes) {
4069       cmdbuf->state.ib.view = ib_view;
4070       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4071    }
4072 }
4073 
4074 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)4075 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
4076 {
4077    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4078    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4079    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4080    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4081 
4082    /* Reset the dirty states */
4083    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
4084 }
4085 
4086 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)4087 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
4088                    const VkCopyBufferInfo2 *info)
4089 {
4090    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4091    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
4092    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
4093 
4094    for (int i = 0; i < info->regionCount; i++) {
4095       const VkBufferCopy2 *region = info->pRegions + i;
4096 
4097       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
4098                                         src_buffer->res, region->srcOffset,
4099                                         region->size);
4100    }
4101 }
4102 
4103 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)4104 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
4105                           const VkCopyBufferToImageInfo2 *info)
4106 {
4107    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4108 
4109    for (int i = 0; i < info->regionCount; i++) {
4110       const VkBufferImageCopy2 *region = info->pRegions + i;
4111 
4112       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4113          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4114             dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
4115       }
4116    }
4117 }
4118 
4119 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)4120 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
4121                           const VkCopyImageToBufferInfo2 *info)
4122 {
4123    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4124 
4125    for (int i = 0; i < info->regionCount; i++) {
4126       const VkBufferImageCopy2 *region = info->pRegions + i;
4127 
4128       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4129          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4130             dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
4131       }
4132    }
4133 }
4134 
4135 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)4136 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
4137                   const VkCopyImageInfo2 *info)
4138 {
4139    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4140    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4141    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4142    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
4143    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
4144 
4145    assert(src->vk.samples == dst->vk.samples);
4146 
4147    bool requires_temp_res = false;
4148 
4149    for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
4150       const VkImageCopy2 *region = &info->pRegions[i];
4151 
4152       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4153          assert(aspect & region->dstSubresource.aspectMask);
4154 
4155          if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
4156                                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
4157              src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
4158              dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
4159             requires_temp_res = true;
4160             break;
4161          }
4162       }
4163    }
4164 
4165    bool use_blit = false;
4166    if (src->vk.samples > 1) {
4167       use_blit = requires_temp_res;
4168 
4169       for (int i = 0; i < info->regionCount; i++) {
4170          const VkImageCopy2 *region = info->pRegions + i;
4171          if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
4172              region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
4173              region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
4174              region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
4175              region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
4176              region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
4177             use_blit = true;
4178       }
4179    }
4180 
4181    if (use_blit) {
4182       /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
4183        * is issued on a transfer queue, but we don't have any better option
4184        * right now...
4185        */
4186       STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
4187 
4188       VkBlitImageInfo2 blit_info = {
4189          .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
4190          .srcImage = info->srcImage,
4191          .srcImageLayout = info->srcImageLayout,
4192          .dstImage = info->dstImage,
4193          .dstImageLayout = info->dstImageLayout,
4194          .regionCount = info->regionCount,
4195          .pRegions = blit_regions,
4196          .filter = VK_FILTER_NEAREST,
4197       };
4198 
4199       for (uint32_t r = 0; r < info->regionCount; r++) {
4200          blit_regions[r] = (VkImageBlit2) {
4201             .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4202             .srcSubresource = info->pRegions[r].srcSubresource,
4203             .srcOffsets = {
4204                 info->pRegions[r].srcOffset,
4205                 info->pRegions[r].srcOffset,
4206             },
4207             .dstSubresource = info->pRegions[r].dstSubresource,
4208             .dstOffsets = {
4209                 info->pRegions[r].dstOffset,
4210                 info->pRegions[r].dstOffset,
4211             },
4212          };
4213 
4214          blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
4215          blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
4216          blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
4217          blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
4218          blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
4219          blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
4220       }
4221 
4222       dzn_CmdBlitImage2(commandBuffer, &blit_info);
4223 
4224       STACK_ARRAY_FINISH(blit_regions);
4225       return;
4226    }
4227 
4228    D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
4229    D3D12_RESOURCE_DESC tmp_desc = {
4230       .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
4231       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
4232       .DepthOrArraySize = 1,
4233       .MipLevels = 1,
4234       .Format = src->desc.Format,
4235       .SampleDesc = { .Count = 1, .Quality = 0 },
4236       .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
4237       .Flags = D3D12_RESOURCE_FLAG_NONE,
4238    };
4239 
4240    if (requires_temp_res) {
4241       ID3D12Device4 *dev = device->dev;
4242       VkImageAspectFlags aspect = 0;
4243       uint64_t max_size = 0;
4244 
4245       if (vk_format_has_depth(src->vk.format))
4246          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4247       else if (vk_format_has_stencil(src->vk.format))
4248          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4249       else
4250          aspect = VK_IMAGE_ASPECT_COLOR_BIT;
4251 
4252       for (uint32_t i = 0; i < info->regionCount; i++) {
4253          const VkImageCopy2 *region = &info->pRegions[i];
4254          uint64_t region_size = 0;
4255 
4256          tmp_desc.Format =
4257             dzn_image_get_dxgi_format(pdev, src->vk.format,
4258                                       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
4259                                       aspect);
4260          tmp_desc.Width = region->extent.width;
4261          tmp_desc.Height = region->extent.height;
4262 
4263          ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
4264                                              0, 1, 0,
4265                                              NULL, NULL, NULL,
4266                                              &region_size);
4267          max_size = MAX2(max_size, region_size * region->extent.depth);
4268       }
4269 
4270       VkResult result =
4271          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
4272                                            DZN_INTERNAL_BUF_DEFAULT,
4273                                            D3D12_RESOURCE_STATE_COPY_DEST,
4274                                            0,
4275                                            &tmp_loc.pResource, NULL);
4276       if (result != VK_SUCCESS)
4277          return;
4278 
4279       tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4280    }
4281 
4282    for (int i = 0; i < info->regionCount; i++) {
4283       const VkImageCopy2 *region = &info->pRegions[i];
4284 
4285       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4286          for (uint32_t l = 0; l < MAX2(region->srcSubresource.layerCount, region->dstSubresource.layerCount); l++)
4287             dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
4288       }
4289    }
4290 }
4291 
4292 static VkResult
dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer * cmdbuf,uint32_t num_view_slots,D3D12_FILTER sampler_filter,struct dzn_descriptor_heap ** view_heap,uint32_t * view_heap_slot,struct dzn_descriptor_heap ** sampler_heap,uint32_t * sampler_heap_slot)4293 dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer *cmdbuf,
4294                                    uint32_t num_view_slots, D3D12_FILTER sampler_filter,
4295                                    struct dzn_descriptor_heap **view_heap, uint32_t *view_heap_slot,
4296                                    struct dzn_descriptor_heap **sampler_heap, uint32_t *sampler_heap_slot)
4297 {
4298    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4299 
4300    VkResult result =
4301       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
4302                                            num_view_slots, view_heap, view_heap_slot);
4303 
4304    if (result != VK_SUCCESS) {
4305       vk_command_buffer_set_error(&cmdbuf->vk, result);
4306       return result;
4307    }
4308 
4309    if (!device->support_static_samplers) {
4310       result =
4311          dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->sampler_pool, device,
4312                                               1, sampler_heap, sampler_heap_slot);
4313 
4314       if (result != VK_SUCCESS) {
4315          vk_command_buffer_set_error(&cmdbuf->vk, result);
4316          return result;
4317       }
4318 
4319       D3D12_SAMPLER_DESC sampler_desc = {
4320          .Filter = sampler_filter,
4321          .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4322          .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4323          .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4324          .MipLODBias = 0,
4325          .MaxAnisotropy = 0,
4326          .MinLOD = 0,
4327          .MaxLOD = D3D12_FLOAT32_MAX,
4328       };
4329       ID3D12Device4_CreateSampler(device->dev, &sampler_desc,
4330          dzn_descriptor_heap_get_cpu_handle(*sampler_heap, *sampler_heap_slot));
4331    }
4332 
4333    if (*view_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
4334        (*sampler_heap && *sampler_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])) {
4335       ID3D12DescriptorHeap * const heaps[] = { (*view_heap)->heap, *sampler_heap ? (*sampler_heap)->heap : NULL };
4336       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = *view_heap;
4337       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = *sampler_heap;
4338       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, *sampler_heap ? 2 : 1, heaps);
4339    }
4340 
4341    return VK_SUCCESS;
4342 }
4343 
4344 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)4345 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
4346                   const VkBlitImageInfo2 *info)
4347 {
4348    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4349 
4350    if (info->regionCount == 0)
4351       return;
4352 
4353    uint32_t desc_count = 0;
4354    for (uint32_t r = 0; r < info->regionCount; r++)
4355       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4356 
4357    struct dzn_descriptor_heap *heap;
4358    uint32_t heap_slot;
4359    struct dzn_descriptor_heap *sampler_heap = NULL;
4360    uint32_t sampler_heap_slot = 0;
4361    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4362                                                         info->filter == VK_FILTER_LINEAR ?
4363                                                          D3D12_FILTER_MIN_MAG_MIP_LINEAR :
4364                                                          D3D12_FILTER_MIN_MAG_MIP_POINT,
4365                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4366 
4367    if (result != VK_SUCCESS)
4368       return;
4369 
4370    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4371 
4372    for (uint32_t r = 0; r < info->regionCount; r++)
4373       dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4374 
4375    cmdbuf->state.pipeline = NULL;
4376    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4377    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4378       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4379          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4380    }
4381 }
4382 
4383 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4384 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
4385                      const VkResolveImageInfo2 *info)
4386 {
4387    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4388 
4389    if (info->regionCount == 0)
4390       return;
4391 
4392    uint32_t desc_count = 0;
4393    for (uint32_t r = 0; r < info->regionCount; r++)
4394       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4395 
4396    struct dzn_descriptor_heap *heap;
4397    uint32_t heap_slot;
4398    struct dzn_descriptor_heap *sampler_heap = NULL;
4399    uint32_t sampler_heap_slot = 0;
4400    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4401                                                         D3D12_FILTER_MIN_MAG_MIP_POINT,
4402                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4403    if (result != VK_SUCCESS)
4404       return;
4405 
4406    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4407 
4408    for (uint32_t r = 0; r < info->regionCount; r++)
4409       dzn_cmd_buffer_resolve_region(cmdbuf, info, VK_RESOLVE_MODE_AVERAGE_BIT, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4410 
4411    cmdbuf->state.pipeline = NULL;
4412    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4413    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4414       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4415          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4416    }
4417 }
4418 
4419 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4420 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
4421                        VkImage image,
4422                        VkImageLayout imageLayout,
4423                        const VkClearColorValue *pColor,
4424                        uint32_t rangeCount,
4425                        const VkImageSubresourceRange *pRanges)
4426 {
4427    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4428    VK_FROM_HANDLE(dzn_image, img, image);
4429 
4430    dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
4431 }
4432 
4433 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4434 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
4435                               VkImage image,
4436                               VkImageLayout imageLayout,
4437                               const VkClearDepthStencilValue *pDepthStencil,
4438                               uint32_t rangeCount,
4439                               const VkImageSubresourceRange *pRanges)
4440 {
4441    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4442    VK_FROM_HANDLE(dzn_image, img, image);
4443 
4444    dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
4445 }
4446 
4447 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)4448 dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,
4449                     uint32_t baseGroupX,
4450                     uint32_t baseGroupY,
4451                     uint32_t baseGroupZ,
4452                     uint32_t groupCountX,
4453                     uint32_t groupCountY,
4454                     uint32_t groupCountZ)
4455 {
4456    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4457 
4458    cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
4459    cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
4460    cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
4461    cmdbuf->state.sysvals.compute.base_group_x = baseGroupX;
4462    cmdbuf->state.sysvals.compute.base_group_y = baseGroupY;
4463    cmdbuf->state.sysvals.compute.base_group_z = baseGroupZ;
4464    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4465       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4466 
4467    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4468    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
4469 }
4470 
4471 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)4472 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
4473                   VkBuffer dstBuffer,
4474                   VkDeviceSize dstOffset,
4475                   VkDeviceSize size,
4476                   uint32_t data)
4477 {
4478    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4479    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4480 
4481    if (size == VK_WHOLE_SIZE)
4482       size = buf->size - dstOffset;
4483 
4484    size &= ~3ULL;
4485 
4486    ID3D12Resource *src_res;
4487    uint64_t src_offset;
4488    VkResult result =
4489       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4490                                         DZN_INTERNAL_BUF_UPLOAD,
4491                                         D3D12_RESOURCE_STATE_GENERIC_READ,
4492                                         4,
4493                                         &src_res,
4494                                         &src_offset);
4495    if (result != VK_SUCCESS)
4496       return;
4497 
4498    uint32_t *cpu_ptr;
4499    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
4500    cpu_ptr += src_offset / sizeof(uint32_t);
4501    for (uint32_t i = 0; i < size / 4; i++)
4502       cpu_ptr[i] = data;
4503 
4504    ID3D12Resource_Unmap(src_res, 0, NULL);
4505 
4506    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4507 }
4508 
4509 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)4510 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
4511                     VkBuffer dstBuffer,
4512                     VkDeviceSize dstOffset,
4513                     VkDeviceSize size,
4514                     const void *data)
4515 {
4516    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4517    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4518 
4519    if (size == VK_WHOLE_SIZE)
4520       size = buf->size - dstOffset;
4521 
4522    /*
4523     * The spec says:
4524     *   4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
4525     *   buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
4526     *   is not a multiple of 4, then the nearest smaller multiple is used."
4527     */
4528    size &= ~3ULL;
4529 
4530    ID3D12Resource *src_res;
4531    uint64_t src_offset;
4532    VkResult result =
4533       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4534                                         DZN_INTERNAL_BUF_UPLOAD,
4535                                         D3D12_RESOURCE_STATE_GENERIC_READ,
4536                                         4,
4537                                         &src_res, &src_offset);
4538    if (result != VK_SUCCESS)
4539       return;
4540 
4541    void *cpu_ptr;
4542    ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
4543    memcpy((uint8_t *)cpu_ptr + src_offset, data, size),
4544    ID3D12Resource_Unmap(src_res, 0, NULL);
4545 
4546    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4547 }
4548 
4549 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)4550 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
4551                         uint32_t attachmentCount,
4552                         const VkClearAttachment *pAttachments,
4553                         uint32_t rectCount,
4554                         const VkClearRect *pRects)
4555 {
4556    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4557 
4558    for (unsigned i = 0; i < attachmentCount; i++) {
4559       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
4560       struct dzn_image_view *view = NULL;
4561 
4562       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
4563          assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
4564          view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
4565          layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
4566       } else {
4567          if (cmdbuf->state.render.attachments.depth.iview &&
4568              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
4569             view = cmdbuf->state.render.attachments.depth.iview;
4570             layout = cmdbuf->state.render.attachments.depth.layout;
4571          }
4572 
4573          if (cmdbuf->state.render.attachments.stencil.iview &&
4574              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
4575             assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
4576             view = cmdbuf->state.render.attachments.stencil.iview;
4577             layout = cmdbuf->state.render.attachments.stencil.layout;
4578          }
4579       }
4580 
4581       if (!view)
4582          continue;
4583 
4584       for (uint32_t j = 0; j < rectCount; j++) {
4585          D3D12_RECT rect;
4586          dzn_translate_rect(&rect, &pRects[j].rect);
4587 
4588          uint32_t view_mask = cmdbuf->state.multiview.view_mask;
4589          if (view_mask != 0) {
4590             u_foreach_bit(layer, view_mask) {
4591                dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4592                                                &pAttachments[i].clearValue,
4593                                                pAttachments[i].aspectMask,
4594                                                pRects[j].baseArrayLayer + layer,
4595                                                pRects[j].layerCount,
4596                                                1, &rect);
4597             }
4598          } else {
4599             dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4600                                             &pAttachments[i].clearValue,
4601                                             pAttachments[i].aspectMask,
4602                                             pRects[j].baseArrayLayer,
4603                                             pRects[j].layerCount,
4604                                             1, &rect);
4605          }
4606       }
4607    }
4608 }
4609 
4610 static D3D12_RESOLVE_MODE
dzn_get_resolve_mode(VkResolveModeFlags mode)4611 dzn_get_resolve_mode(VkResolveModeFlags mode)
4612 {
4613    switch (mode) {
4614    case VK_RESOLVE_MODE_AVERAGE_BIT: return D3D12_RESOLVE_MODE_AVERAGE;
4615    case VK_RESOLVE_MODE_MAX_BIT: return D3D12_RESOLVE_MODE_MAX;
4616    case VK_RESOLVE_MODE_MIN_BIT: return D3D12_RESOLVE_MODE_MIN;
4617    /* TODO */
4618    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return D3D12_RESOLVE_MODE_MIN;
4619    default: return D3D12_RESOLVE_MODE_AVERAGE;
4620    }
4621 }
4622 
4623 static void
dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,const VkImageSubresourceRange * src_range,const VkImageSubresourceRange * dst_range)4624 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer *cmdbuf,
4625                                                      const struct dzn_rendering_attachment *att,
4626                                                      VkImageAspectFlagBits aspect,
4627                                                      const VkImageSubresourceRange *src_range,
4628                                                      const VkImageSubresourceRange *dst_range)
4629 {
4630    uint32_t desc_count = util_bitcount(aspect) * src_range->levelCount * src_range->layerCount;
4631 
4632    struct dzn_descriptor_heap *heap;
4633    uint32_t heap_slot;
4634    struct dzn_descriptor_heap *sampler_heap = NULL;
4635    uint32_t sampler_heap_slot = 0;
4636    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4637                                                         D3D12_FILTER_MIN_MAG_MIP_POINT,
4638                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4639    if (result != VK_SUCCESS)
4640       return;
4641 
4642    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4643 
4644    VkImageResolve2 region = {
4645       .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
4646       .srcSubresource = {
4647          .aspectMask = aspect,
4648          .baseArrayLayer = src_range->baseArrayLayer,
4649          .layerCount = src_range->layerCount,
4650       },
4651       .dstSubresource = {
4652          .aspectMask = aspect,
4653          .baseArrayLayer = dst_range->baseArrayLayer,
4654          .layerCount = dst_range->layerCount,
4655       },
4656    };
4657    VkResolveImageInfo2 resolve_info = {
4658       .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2,
4659       .srcImage = vk_image_to_handle(att->iview->vk.image),
4660       .dstImage = vk_image_to_handle(att->resolve.iview->vk.image),
4661       .srcImageLayout = att->layout,
4662       .dstImageLayout = att->resolve.layout,
4663       .regionCount = 1,
4664       .pRegions = &region
4665    };
4666    for (uint32_t level = 0; level < src_range->levelCount; ++level) {
4667       region.srcSubresource.mipLevel = level + src_range->baseMipLevel;
4668       region.dstSubresource.mipLevel = level + dst_range->baseMipLevel;
4669       region.extent = (VkExtent3D){
4670          u_minify(att->iview->vk.image->extent.width, region.srcSubresource.mipLevel),
4671          u_minify(att->iview->vk.image->extent.height, region.srcSubresource.mipLevel),
4672          u_minify(att->iview->vk.image->extent.depth, region.srcSubresource.mipLevel),
4673       };
4674       dzn_cmd_buffer_resolve_region(cmdbuf, &resolve_info, att->resolve.mode, heap, &heap_slot, sampler_heap, sampler_heap_slot, 0);
4675    }
4676 
4677    cmdbuf->state.pipeline = NULL;
4678    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4679    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4680       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4681          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4682    }
4683 }
4684 
4685 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,bool force_blit_resolve)4686 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
4687                                             const struct dzn_rendering_attachment *att,
4688                                             VkImageAspectFlagBits aspect,
4689                                             bool force_blit_resolve)
4690 {
4691    struct dzn_image_view *src = att->iview;
4692    struct dzn_image_view *dst = att->resolve.iview;
4693 
4694    if (!src || !dst || att->resolve.mode == VK_RESOLVE_MODE_NONE)
4695       return;
4696 
4697    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4698    struct dzn_physical_device *pdev =
4699       container_of(device->vk.physical, struct dzn_physical_device, vk);
4700 
4701    struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
4702    struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
4703 
4704    VkImageSubresourceRange src_range = {
4705       .aspectMask = (VkImageAspectFlags)aspect,
4706       .baseMipLevel = src->vk.base_mip_level,
4707       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4708       .baseArrayLayer = src->vk.base_array_layer,
4709       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4710    };
4711    if (src_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4712       src_range.baseArrayLayer = 0;
4713       src_range.layerCount = 1;
4714    }
4715 
4716    VkImageSubresourceRange dst_range = {
4717       .aspectMask = (VkImageAspectFlags)aspect,
4718       .baseMipLevel = dst->vk.base_mip_level,
4719       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4720       .baseArrayLayer = dst->vk.base_array_layer,
4721       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4722    };
4723    if (dst_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4724       dst_range.baseArrayLayer = 0;
4725       dst_range.layerCount = 1;
4726    }
4727 
4728    if (force_blit_resolve ||
4729        /* Resolve modes other than average are poorly tested / buggy */
4730        att->resolve.mode != VK_RESOLVE_MODE_AVERAGE_BIT ||
4731        /* D3D resolve API can't go from (e.g.) D32S8X24 to D32 */
4732        src->vk.view_format != dst->vk.view_format) {
4733       dzn_cmd_buffer_resolve_rendering_attachment_via_blit(cmdbuf, att, aspect, &src_range, &dst_range);
4734       return;
4735    }
4736 
4737    VkImageLayout src_layout = att->layout;
4738    VkImageLayout dst_layout = att->resolve.layout;
4739 
4740    D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect, cmdbuf->type);
4741    D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect, cmdbuf->type);
4742    D3D12_BARRIER_LAYOUT src_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4743       src_needed_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
4744    D3D12_BARRIER_LAYOUT dst_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4745       dst_needed_layout = D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
4746    if (cmdbuf->enhanced_barriers) {
4747       src_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, src_img,
4748                                                          src_layout, src_needed_layout,
4749                                                          &src_range);
4750       dst_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst_img,
4751                                                          dst_layout, dst_needed_layout,
4752                                                          &dst_range);
4753    } else {
4754       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4755                                                         src_state,
4756                                                         D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4757                                                         DZN_QUEUE_TRANSITION_FLUSH);
4758       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4759                                                         dst_state,
4760                                                         D3D12_RESOURCE_STATE_RESOLVE_DEST,
4761                                                         DZN_QUEUE_TRANSITION_FLUSH);
4762    }
4763 
4764    for (uint32_t level = 0; level < src_range.levelCount; level++) {
4765       for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
4766          uint32_t src_subres =
4767             dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
4768          uint32_t dst_subres =
4769             dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
4770 
4771          DXGI_FORMAT format =
4772             dzn_image_get_dxgi_format(pdev, dst->vk.format,
4773                                       dst->vk.usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
4774                                       aspect);
4775 
4776          if (cmdbuf->cmdlist8 &&
4777              pdev->options2.ProgrammableSamplePositionsTier > D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED) {
4778             ID3D12GraphicsCommandList8_ResolveSubresourceRegion(cmdbuf->cmdlist8,
4779                                                                 dst_img->res, dst_subres,
4780                                                                 0, 0,
4781                                                                 src_img->res, src_subres,
4782                                                                 NULL,
4783                                                                 format,
4784                                                                 dzn_get_resolve_mode(att->resolve.mode));
4785          } else {
4786             ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
4787                                                           dst_img->res, dst_subres,
4788                                                           src_img->res, src_subres,
4789                                                           format);
4790          }
4791       }
4792    }
4793 
4794    if (cmdbuf->enhanced_barriers) {
4795       dzn_cmd_buffer_restore_layout(cmdbuf, src_img,
4796                                     D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE,
4797                                     src_needed_layout, src_restore_layout,
4798                                     &src_range);
4799       dzn_cmd_buffer_restore_layout(cmdbuf, dst_img,
4800                                     D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_DEST,
4801                                     dst_needed_layout, dst_restore_layout,
4802                                     &dst_range);
4803    } else {
4804       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4805                                                         D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4806                                                         src_state,
4807                                                         DZN_QUEUE_TRANSITION_FLUSH);
4808       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4809                                                         D3D12_RESOURCE_STATE_RESOLVE_DEST,
4810                                                         dst_state,
4811                                                         DZN_QUEUE_TRANSITION_FLUSH);
4812    }
4813 }
4814 
4815 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)4816 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
4817                                             const VkRenderingAttachmentInfo *att,
4818                                             VkImageAspectFlagBits aspect)
4819 {
4820    const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
4821       vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
4822    VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4823 
4824    if (!initial_layout || !iview)
4825       return;
4826 
4827    struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
4828    VkImageSubresourceRange range = {
4829       .aspectMask = aspect,
4830       .baseMipLevel = iview->vk.base_mip_level,
4831       .levelCount = iview->vk.level_count,
4832       .baseArrayLayer = iview->vk.base_array_layer,
4833       .layerCount = iview->vk.layer_count,
4834    };
4835    if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4836       range.baseArrayLayer = 0;
4837       range.layerCount = 1;
4838    }
4839 
4840    if (cmdbuf->enhanced_barriers) {
4841       D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL;
4842       D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON;
4843       if (initial_layout->initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
4844          sync_before = D3D12_BARRIER_SYNC_NONE;
4845          access_before = D3D12_BARRIER_ACCESS_NO_ACCESS;
4846       }
4847 
4848       D3D12_BARRIER_LAYOUT layout_before = dzn_vk_layout_to_d3d_layout(initial_layout->initialLayout, cmdbuf->type, aspect);
4849       D3D12_BARRIER_LAYOUT layout_after = dzn_vk_layout_to_d3d_layout(att->imageLayout, cmdbuf->type, aspect);
4850       if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) {
4851          layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED;
4852          layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED;
4853       }
4854 
4855       dzn_cmd_buffer_image_barrier(cmdbuf, image,
4856                                    sync_before, D3D12_BARRIER_SYNC_DRAW,
4857                                    access_before, D3D12_BARRIER_ACCESS_COMMON,
4858                                    layout_before,
4859                                    layout_after,
4860                                    &range);
4861    } else {
4862       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
4863                                                          initial_layout->initialLayout,
4864                                                          att->imageLayout,
4865                                                          DZN_QUEUE_TRANSITION_FLUSH);
4866    }
4867 }
4868 
4869 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)4870 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
4871                       const VkRenderingInfo *pRenderingInfo)
4872 {
4873    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4874 
4875    D3D12_RECT new_render_area = {
4876       .left = pRenderingInfo->renderArea.offset.x,
4877       .top = pRenderingInfo->renderArea.offset.y,
4878       .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
4879       .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
4880    };
4881 
4882    // The render area has an impact on the scissor state.
4883    if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
4884       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
4885       cmdbuf->state.render.area = new_render_area;
4886    }
4887 
4888    cmdbuf->state.render.flags = pRenderingInfo->flags;
4889    cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
4890    cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
4891 
4892    D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
4893    D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
4894 
4895    cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
4896    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
4897       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
4898       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4899 
4900       cmdbuf->state.render.attachments.colors[i].iview = iview;
4901       cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
4902       cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
4903       cmdbuf->state.render.attachments.colors[i].resolve.iview =
4904          dzn_image_view_from_handle(att->resolveImageView);
4905       cmdbuf->state.render.attachments.colors[i].resolve.layout =
4906          att->resolveImageLayout;
4907       cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
4908 
4909       if (!iview) {
4910          rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
4911          continue;
4912       }
4913 
4914       struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4915       rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
4916       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4917                                                   VK_IMAGE_ASPECT_COLOR_BIT);
4918    }
4919 
4920    if (pRenderingInfo->pDepthAttachment) {
4921       const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
4922 
4923       cmdbuf->state.render.attachments.depth.iview =
4924          dzn_image_view_from_handle(att->imageView);
4925       cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
4926       cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
4927       cmdbuf->state.render.attachments.depth.resolve.iview =
4928          dzn_image_view_from_handle(att->resolveImageView);
4929       cmdbuf->state.render.attachments.depth.resolve.layout =
4930          att->resolveImageLayout;
4931       cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
4932       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4933                                                   VK_IMAGE_ASPECT_DEPTH_BIT);
4934    }
4935 
4936    if (pRenderingInfo->pStencilAttachment) {
4937       const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
4938 
4939       cmdbuf->state.render.attachments.stencil.iview =
4940          dzn_image_view_from_handle(att->imageView);
4941       cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
4942       cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
4943       cmdbuf->state.render.attachments.stencil.resolve.iview =
4944          dzn_image_view_from_handle(att->resolveImageView);
4945       cmdbuf->state.render.attachments.stencil.resolve.layout =
4946          att->resolveImageLayout;
4947       cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
4948       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4949                                                   VK_IMAGE_ASPECT_STENCIL_BIT);
4950    }
4951 
4952    if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
4953       struct dzn_image_view *z_iview =
4954          pRenderingInfo->pDepthAttachment ?
4955          dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
4956          NULL;
4957       struct dzn_image_view *s_iview =
4958          pRenderingInfo->pStencilAttachment ?
4959          dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
4960          NULL;
4961       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
4962       assert(!z_iview || !s_iview || z_iview == s_iview);
4963 
4964       if (iview) {
4965          struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4966 
4967          zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
4968       }
4969    }
4970 
4971    ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
4972                                                  pRenderingInfo->colorAttachmentCount,
4973                                                  pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
4974                                                  false, zs_handle.ptr ? &zs_handle : NULL);
4975 
4976    for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
4977       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
4978       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4979 
4980       if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
4981           !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
4982          if (pRenderingInfo->viewMask != 0) {
4983             u_foreach_bit(layer, pRenderingInfo->viewMask) {
4984                dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
4985                                                &att->clearValue,
4986                                                VK_IMAGE_ASPECT_COLOR_BIT, layer,
4987                                                1, 1, &cmdbuf->state.render.area);
4988             }
4989          } else {
4990             dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
4991                                             &att->clearValue,
4992                                             VK_IMAGE_ASPECT_COLOR_BIT, 0,
4993                                             pRenderingInfo->layerCount, 1,
4994                                             &cmdbuf->state.render.area);
4995          }
4996       }
4997    }
4998 
4999    if ((pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) &&
5000        !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5001       const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
5002       const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
5003       struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
5004       struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
5005       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
5006       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
5007 
5008       assert(!z_iview || !s_iview || z_iview == s_iview);
5009 
5010       VkImageAspectFlags aspects = 0;
5011       VkClearValue clear_val;
5012 
5013       if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5014          aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
5015          clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
5016          layout = z_att->imageLayout;
5017       }
5018 
5019       if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5020          aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
5021          clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
5022          layout = s_att->imageLayout;
5023       }
5024 
5025       if (aspects != 0) {
5026          if (pRenderingInfo->viewMask != 0) {
5027             u_foreach_bit(layer, pRenderingInfo->viewMask) {
5028                dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5029                                                &clear_val, aspects, layer,
5030                                                1, 1, &cmdbuf->state.render.area);
5031             }
5032          } else {
5033             dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5034                                             &clear_val, aspects, 0,
5035                                             VK_REMAINING_ARRAY_LAYERS, 1,
5036                                             &cmdbuf->state.render.area);
5037          }
5038       }
5039    }
5040 
5041    cmdbuf->state.multiview.num_views = MAX2(util_bitcount(pRenderingInfo->viewMask), 1);
5042    cmdbuf->state.multiview.view_mask = MAX2(pRenderingInfo->viewMask, 1);
5043 }
5044 
5045 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)5046 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
5047 {
5048    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5049 
5050    if (!(cmdbuf->state.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
5051       for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
5052          dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5053                                                      &cmdbuf->state.render.attachments.colors[i],
5054                                                      VK_IMAGE_ASPECT_COLOR_BIT, false);
5055       }
5056 
5057       bool separate_stencil_resolve =
5058          cmdbuf->state.render.attachments.depth.resolve.mode !=
5059          cmdbuf->state.render.attachments.stencil.resolve.mode;
5060       dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5061                                                   &cmdbuf->state.render.attachments.depth,
5062                                                   VK_IMAGE_ASPECT_DEPTH_BIT,
5063                                                   separate_stencil_resolve);
5064       dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5065                                                   &cmdbuf->state.render.attachments.stencil,
5066                                                   VK_IMAGE_ASPECT_STENCIL_BIT,
5067                                                   separate_stencil_resolve);
5068    }
5069 
5070    memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
5071 }
5072 
5073 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)5074 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
5075                     VkPipelineBindPoint pipelineBindPoint,
5076                     VkPipeline pipe)
5077 {
5078    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5079    VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
5080 
5081    cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
5082    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5083    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
5084       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
5085 
5086       if (!gfx->vp.dynamic) {
5087          memcpy(cmdbuf->state.viewports, gfx->vp.desc,
5088                 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
5089          cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5090          cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5091          cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5092          cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5093       }
5094 
5095       if (!gfx->scissor.dynamic) {
5096          memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
5097                 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
5098          cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5099       }
5100 
5101       if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
5102          cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
5103          cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
5104          cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5105       }
5106 
5107       if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
5108          cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
5109          cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
5110          cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5111       }
5112 
5113       if (!gfx->blend.dynamic_constants) {
5114          memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
5115                 sizeof(cmdbuf->state.blend.constants));
5116          cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5117       }
5118 
5119       for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
5120          cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
5121 
5122       if (gfx->vb.count > 0)
5123          BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
5124    }
5125 }
5126 
5127 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)5128 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
5129                           VkPipelineBindPoint pipelineBindPoint,
5130                           VkPipelineLayout layout,
5131                           uint32_t firstSet,
5132                           uint32_t descriptorSetCount,
5133                           const VkDescriptorSet *pDescriptorSets,
5134                           uint32_t dynamicOffsetCount,
5135                           const uint32_t *pDynamicOffsets)
5136 {
5137    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5138    VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
5139 
5140    struct dzn_descriptor_state *desc_state =
5141       &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
5142    uint32_t dirty = 0;
5143 
5144    for (uint32_t i = 0; i < descriptorSetCount; i++) {
5145       uint32_t idx = firstSet + i;
5146       VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
5147 
5148       if (desc_state->sets[idx].set != set) {
5149          desc_state->sets[idx].set = set;
5150          dirty |= DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << idx;
5151       }
5152 
5153       uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
5154       if (dynamic_buffer_count) {
5155          assert(dynamicOffsetCount >= dynamic_buffer_count);
5156 
5157          for (uint32_t j = 0; j < dynamic_buffer_count; j++)
5158             desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
5159 
5160          dynamicOffsetCount -= dynamic_buffer_count;
5161          pDynamicOffsets += dynamic_buffer_count;
5162          dirty |= DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS;
5163       }
5164    }
5165 
5166    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
5167 }
5168 
5169 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)5170 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
5171                    uint32_t firstViewport,
5172                    uint32_t viewportCount,
5173                    const VkViewport *pViewports)
5174 {
5175    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5176 
5177    STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
5178 
5179    for (uint32_t i = 0; i < viewportCount; i++) {
5180       uint32_t vp = i + firstViewport;
5181 
5182       dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
5183 
5184       if (pViewports[i].minDepth > pViewports[i].maxDepth)
5185          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5186       else
5187          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5188 
5189       if (pViewports[i].height > 0)
5190          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
5191       else
5192          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
5193    }
5194 
5195    cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5196    cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5197 
5198    if (viewportCount) {
5199       cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5200       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5201          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5202    }
5203 }
5204 
5205 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)5206 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
5207                   uint32_t firstScissor,
5208                   uint32_t scissorCount,
5209                   const VkRect2D *pScissors)
5210 {
5211    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5212 
5213    for (uint32_t i = 0; i < scissorCount; i++)
5214       dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
5215 
5216    if (scissorCount)
5217       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5218 }
5219 
5220 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)5221 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
5222                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
5223                      const void *pValues)
5224 {
5225    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5226    struct dzn_cmd_buffer_push_constant_state *states[2];
5227    uint32_t num_states = 0;
5228 
5229    if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
5230       states[num_states++] = &cmdbuf->state.push_constant.gfx;
5231 
5232    if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
5233       states[num_states++] = &cmdbuf->state.push_constant.compute;
5234 
5235    for (uint32_t i = 0; i < num_states; i++) {
5236       memcpy(((char *)states[i]->values) + offset, pValues, size);
5237       states[i]->offset =
5238          states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
5239       states[i]->end = MAX2(states[i]->end, offset + size);
5240    }
5241 }
5242 
5243 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)5244 dzn_CmdDraw(VkCommandBuffer commandBuffer,
5245             uint32_t vertexCount,
5246             uint32_t instanceCount,
5247             uint32_t firstVertex,
5248             uint32_t firstInstance)
5249 {
5250    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5251 
5252    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5253       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5254 
5255    cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
5256    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5257 
5258    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5259       1 : pipeline->multiview.view_mask;
5260 
5261    if (pipeline->ia.triangle_fan) {
5262       D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5263 
5264       VkResult result =
5265          dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
5266       if (result != VK_SUCCESS || !vertexCount)
5267          return;
5268 
5269       cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5270       u_foreach_bit(view, view_mask) {
5271          cmdbuf->state.sysvals.gfx.view_index = view;
5272          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5273             DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5274          dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5275          ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
5276                                                 firstVertex, firstInstance);
5277       }
5278 
5279       /* Restore the IB view if we modified it when lowering triangle fans. */
5280       if (ib_view.SizeInBytes > 0) {
5281          cmdbuf->state.ib.view = ib_view;
5282          cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5283       }
5284    } else {
5285       cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
5286       u_foreach_bit(view, view_mask) {
5287          cmdbuf->state.sysvals.gfx.view_index = view;
5288          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5289             DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5290          dzn_cmd_buffer_prepare_draw(cmdbuf, false);
5291          ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
5292                                           firstVertex, firstInstance);
5293       }
5294    }
5295 }
5296 
5297 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)5298 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
5299                    uint32_t indexCount,
5300                    uint32_t instanceCount,
5301                    uint32_t firstIndex,
5302                    int32_t vertexOffset,
5303                    uint32_t firstInstance)
5304 {
5305    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5306 
5307    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5308       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5309 
5310    if (pipeline->ia.triangle_fan &&
5311        dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
5312       /* The indexed+primitive-restart+triangle-fan combination is a mess,
5313        * since we have to walk the index buffer, skip entries with the
5314        * special 0xffff/0xffffffff values, and push triangle list indices
5315        * for the remaining values. All of this has an impact on the index
5316        * count passed to the draw call, which forces us to use the indirect
5317        * path.
5318        */
5319       struct dzn_indirect_indexed_draw_params params = {
5320          .index_count = indexCount,
5321          .instance_count = instanceCount,
5322          .first_index = firstIndex,
5323          .vertex_offset = vertexOffset,
5324          .first_instance = firstInstance,
5325       };
5326 
5327       ID3D12Resource *draw_buf;
5328       uint64_t offset;
5329       VkResult result =
5330          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
5331                                            DZN_INTERNAL_BUF_UPLOAD,
5332                                            D3D12_RESOURCE_STATE_GENERIC_READ,
5333                                            4,
5334                                            &draw_buf, &offset);
5335       if (result != VK_SUCCESS)
5336          return;
5337 
5338       void *cpu_ptr;
5339       ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
5340       memcpy((uint8_t *)cpu_ptr + offset, &params, sizeof(params));
5341 
5342       ID3D12Resource_Unmap(draw_buf, 0, NULL);
5343 
5344       dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, offset, NULL, 0, 1, sizeof(params), true);
5345       return;
5346    }
5347 
5348    cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
5349    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5350    cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5351 
5352    D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5353 
5354    if (pipeline->ia.triangle_fan) {
5355       VkResult result =
5356          dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
5357       if (result != VK_SUCCESS || !indexCount)
5358          return;
5359    }
5360 
5361    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5362       1 : pipeline->multiview.view_mask;
5363    u_foreach_bit(view, view_mask) {
5364       cmdbuf->state.sysvals.gfx.view_index = view;
5365       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5366          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5367 
5368       dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5369       ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
5370                                             vertexOffset, firstInstance);
5371    }
5372 
5373    /* Restore the IB view if we modified it when lowering triangle fans. */
5374    if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
5375       cmdbuf->state.ib.view = ib_view;
5376       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5377    }
5378 }
5379 
5380 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5381 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
5382                     VkBuffer buffer,
5383                     VkDeviceSize offset,
5384                     uint32_t drawCount,
5385                     uint32_t stride)
5386 {
5387    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5388    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5389 
5390    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
5391 }
5392 
5393 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5394 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
5395                            VkBuffer buffer,
5396                            VkDeviceSize offset,
5397                            uint32_t drawCount,
5398                            uint32_t stride)
5399 {
5400    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5401    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5402 
5403    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
5404 }
5405 
5406 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5407 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
5408                          VkBuffer buffer,
5409                          VkDeviceSize offset,
5410                          VkBuffer countBuffer,
5411                          VkDeviceSize countBufferOffset,
5412                          uint32_t maxDrawCount,
5413                          uint32_t stride)
5414 {
5415    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5416    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5417    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5418 
5419    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5420                                 count_buf->res, countBufferOffset,
5421                                 maxDrawCount, stride, false);
5422 }
5423 
5424 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5425 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
5426                                 VkBuffer buffer,
5427                                 VkDeviceSize offset,
5428                                 VkBuffer countBuffer,
5429                                 VkDeviceSize countBufferOffset,
5430                                 uint32_t maxDrawCount,
5431                                 uint32_t stride)
5432 {
5433    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5434    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5435    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5436 
5437    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5438                                 count_buf->res, countBufferOffset,
5439                                 maxDrawCount, stride, true);
5440 }
5441 
5442 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)5443 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
5444                          uint32_t firstBinding,
5445                          uint32_t bindingCount,
5446                          const VkBuffer *pBuffers,
5447                          const VkDeviceSize *pOffsets)
5448 {
5449    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5450 
5451    if (!bindingCount)
5452       return;
5453 
5454    D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
5455 
5456    for (uint32_t i = 0; i < bindingCount; i++) {
5457       VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
5458 
5459       vbviews[firstBinding + i].BufferLocation = buf->gpuva + pOffsets[i];
5460       vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
5461    }
5462 
5463    BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
5464                     firstBinding + bindingCount - 1);
5465 }
5466 
5467 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)5468 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
5469                        VkBuffer buffer,
5470                        VkDeviceSize offset,
5471                        VkIndexType indexType)
5472 {
5473    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5474    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5475 
5476    cmdbuf->state.ib.view.BufferLocation = buf->gpuva + offset;
5477    cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
5478    switch (indexType) {
5479    case VK_INDEX_TYPE_UINT16:
5480       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
5481       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
5482       break;
5483    case VK_INDEX_TYPE_UINT32:
5484       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
5485       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
5486       break;
5487    default: unreachable("Invalid index type");
5488    }
5489 
5490    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5491 
5492    const struct dzn_graphics_pipeline *pipeline =
5493       (const struct dzn_graphics_pipeline *)cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5494 
5495    if (pipeline &&
5496        dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
5497       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5498 }
5499 
5500 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags2 stageMask)5501 dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,
5502                    VkEvent event,
5503                    VkPipelineStageFlags2 stageMask)
5504 {
5505    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5506    VK_FROM_HANDLE(dzn_event, evt, event);
5507 
5508    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
5509       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5510 }
5511 
5512 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,VkEvent event,const VkDependencyInfo * pDependencyInfo)5513 dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,
5514                  VkEvent event,
5515                  const VkDependencyInfo *pDependencyInfo)
5516 {
5517    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5518    VK_FROM_HANDLE(dzn_event, evt, event);
5519 
5520    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
5521       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5522 }
5523 
5524 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfo)5525 dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,
5526                    uint32_t eventCount,
5527                    const VkEvent *pEvents,
5528                    const VkDependencyInfo *pDependencyInfo)
5529 {
5530    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5531 
5532    /* Intra-command list wait is handle by this pipeline flush, which is
5533     * overkill, but that's the best we can do with the standard D3D12 barrier
5534     * API.
5535     *
5536     * Inter-command list is taken care of by the serialization done at the
5537     * ExecuteCommandList() level:
5538     * "Calling ExecuteCommandLists twice in succession (from the same thread,
5539     *  or different threads) guarantees that the first workload (A) finishes
5540     *  before the second workload (B)"
5541     *
5542     * HOST -> DEVICE signaling is ignored and we assume events are always
5543     * signaled when we reach the vkCmdWaitEvents() point.:
5544     * "Command buffers in the submission can include vkCmdWaitEvents commands
5545     *  that wait on events that will not be signaled by earlier commands in the
5546     *  queue. Such events must be signaled by the application using vkSetEvent,
5547     *  and the vkCmdWaitEvents commands that wait upon them must not be inside
5548     *  a render pass instance.
5549     *  The event must be set before the vkCmdWaitEvents command is executed."
5550     */
5551    bool flush_pipeline = false;
5552 
5553    for (uint32_t i = 0; i < eventCount; i++) {
5554       VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
5555 
5556       struct hash_entry *he =
5557          _mesa_hash_table_search(cmdbuf->events.ht, event);
5558       if (he) {
5559          enum dzn_event_state state = (uintptr_t)he->data;
5560          assert(state != DZN_EVENT_STATE_RESET);
5561          flush_pipeline = state == DZN_EVENT_STATE_SET;
5562       }
5563    }
5564 
5565    if (flush_pipeline) {
5566       if (cmdbuf->enhanced_barriers) {
5567          dzn_cmd_buffer_global_barrier(cmdbuf,
5568                                        D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
5569                                        D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON);
5570       } else {
5571          D3D12_RESOURCE_BARRIER barrier = {
5572             .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5573             .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5574             .UAV = {.pResource = NULL },
5575          };
5576 
5577          ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5578       }
5579    }
5580    cmdbuf->vk.base.device->dispatch_table.CmdPipelineBarrier2(
5581       vk_command_buffer_to_handle(&cmdbuf->vk),
5582       pDependencyInfo);
5583 }
5584 
5585 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)5586 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
5587                   VkQueryPool queryPool,
5588                   uint32_t query,
5589                   VkQueryControlFlags flags)
5590 {
5591    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5592    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5593 
5594    struct dzn_cmd_buffer_query_pool_state *state =
5595       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5596    if (!state)
5597       return;
5598 
5599    for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5600       qpool->queries[query + i].type = dzn_query_pool_get_query_type(qpool, flags);
5601 
5602    ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5603 
5604    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, query, cmdbuf->state.multiview.num_views);
5605    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, query, cmdbuf->state.multiview.num_views);
5606 }
5607 
5608 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)5609 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
5610                 VkQueryPool queryPool,
5611                 uint32_t query)
5612 {
5613    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5614    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5615 
5616    struct dzn_cmd_buffer_query_pool_state *state =
5617       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5618    if (!state)
5619       return;
5620 
5621    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5622 
5623    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5624    if (cmdbuf->state.multiview.num_views > 1)
5625       dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5626 }
5627 
5628 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)5629 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
5630                        VkPipelineStageFlags2 stage,
5631                        VkQueryPool queryPool,
5632                        uint32_t query)
5633 {
5634    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5635    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5636 
5637    struct dzn_cmd_buffer_query_pool_state *state =
5638       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5639    if (!state)
5640       return;
5641 
5642    /* Execution barrier so the timestamp gets written after the pipeline flush. */
5643    D3D12_RESOURCE_BARRIER barrier = {
5644       .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5645       .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5646       .UAV = { .pResource = NULL },
5647    };
5648 
5649    ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5650 
5651    for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5652       qpool->queries[query + i].type = D3D12_QUERY_TYPE_TIMESTAMP;
5653    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5654 
5655    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5656    if (cmdbuf->state.multiview.num_views > 1)
5657       dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5658 }
5659 
5660 
5661 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)5662 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
5663                       VkQueryPool queryPool,
5664                       uint32_t firstQuery,
5665                       uint32_t queryCount)
5666 {
5667    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5668    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5669    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5670 
5671    struct dzn_cmd_buffer_query_pool_state *state =
5672       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5673 
5674    if (!state)
5675       return;
5676 
5677    uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
5678 
5679    for (uint32_t q = 0; q < queryCount; q += q_step) {
5680       uint32_t q_count = MIN2(queryCount - q, q_step);
5681 
5682       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5683                                         dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5684                                         device->queries.refs,
5685                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5686                                         q_count * sizeof(uint64_t));
5687    }
5688 
5689    q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
5690 
5691    for (uint32_t q = 0; q < queryCount; q += q_step) {
5692       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5693                                         dzn_query_pool_get_result_offset(qpool, firstQuery + q),
5694                                         device->queries.refs,
5695                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5696                                         qpool->query_size);
5697    }
5698 
5699    dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
5700    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
5701    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, firstQuery, queryCount);
5702 }
5703 
5704 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)5705 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
5706                             VkQueryPool queryPool,
5707                             uint32_t firstQuery,
5708                             uint32_t queryCount,
5709                             VkBuffer dstBuffer,
5710                             VkDeviceSize dstOffset,
5711                             VkDeviceSize stride,
5712                             VkQueryResultFlags flags)
5713 {
5714    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5715    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5716    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
5717 
5718    struct dzn_cmd_buffer_query_pool_state *qpstate =
5719       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5720    if (!qpstate)
5721       return;
5722 
5723    VkResult result =
5724       dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
5725    if (result != VK_SUCCESS)
5726       return;
5727 
5728    bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
5729                    stride == qpool->query_size &&
5730                    !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
5731 #define ALL_STATS \
5732         (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
5733          VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
5734          VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
5735          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
5736          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
5737          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
5738          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
5739          VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
5740          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
5741          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
5742          VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
5743    if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
5744        qpool->pipeline_statistics != ALL_STATS)
5745       raw_copy = false;
5746 #undef ALL_STATS
5747 
5748    if (cmdbuf->enhanced_barriers) {
5749       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
5750          dzn_cmd_buffer_buffer_barrier(cmdbuf, qpool->collect_buffer,
5751                                        D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
5752                                        D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5753       }
5754    } else {
5755       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5756                                                D3D12_RESOURCE_STATE_COPY_DEST,
5757                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5758                                                DZN_QUEUE_TRANSITION_FLUSH);
5759    }
5760 
5761    if (raw_copy) {
5762       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5763                                         qpool->collect_buffer,
5764                                         dzn_query_pool_get_result_offset(qpool, firstQuery),
5765                                         dzn_query_pool_get_result_size(qpool, queryCount));
5766    } else {
5767       uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
5768 
5769       for (uint32_t q = 0; q < queryCount; q++) {
5770          uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
5771          uint32_t dst_counter_offset = 0;
5772 
5773          if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
5774             for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
5775                if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
5776                   continue;
5777 
5778                ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5779                                                  qpool->collect_buffer,
5780                                                  res_offset + (c * sizeof(uint64_t)),
5781                                                  step);
5782                dst_counter_offset += step;
5783             }
5784          } else {
5785             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5786                                               qpool->collect_buffer,
5787                                               res_offset, step);
5788             dst_counter_offset += step;
5789          }
5790 
5791          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
5792             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5793                                               qpool->collect_buffer,
5794                                               dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5795                                               step);
5796          }
5797 
5798          dstOffset += stride;
5799       }
5800    }
5801 
5802    if (!cmdbuf->enhanced_barriers) {
5803       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5804                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5805                                                D3D12_RESOURCE_STATE_COPY_DEST,
5806                                                0);
5807    }
5808 }
5809 
5810 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)5811 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
5812                         VkBuffer buffer,
5813                         VkDeviceSize offset)
5814 {
5815    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5816    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5817 
5818    cmdbuf->state.sysvals.compute.group_count_x = 0;
5819    cmdbuf->state.sysvals.compute.group_count_y = 0;
5820    cmdbuf->state.sysvals.compute.group_count_z = 0;
5821    cmdbuf->state.sysvals.compute.base_group_x = 0;
5822    cmdbuf->state.sysvals.compute.base_group_y = 0;
5823    cmdbuf->state.sysvals.compute.base_group_z = 0;
5824    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
5825       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5826 
5827    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
5828 
5829    struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
5830       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
5831    ID3D12CommandSignature *cmdsig =
5832       dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
5833 
5834    if (!cmdsig) {
5835       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5836       return;
5837    }
5838 
5839    ID3D12Resource *exec_buf;
5840    VkResult result =
5841       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
5842                                         DZN_INTERNAL_BUF_DEFAULT,
5843                                         D3D12_RESOURCE_STATE_COPY_DEST,
5844                                         0,
5845                                         &exec_buf, NULL);
5846    if (result != VK_SUCCESS)
5847       return;
5848 
5849    if (cmdbuf->enhanced_barriers) {
5850       dzn_cmd_buffer_buffer_barrier(cmdbuf, buf->res,
5851                                     D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, D3D12_BARRIER_SYNC_COPY,
5852                                     D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5853    } else {
5854       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, buf->res, 0, 1,
5855                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5856                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5857                                                DZN_QUEUE_TRANSITION_FLUSH);
5858    }
5859 
5860    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
5861                                      buf->res,
5862                                      offset,
5863                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
5864    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
5865                                      buf->res,
5866                                      offset,
5867                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
5868 
5869    if (cmdbuf->enhanced_barriers) {
5870       dzn_cmd_buffer_buffer_barrier(cmdbuf, exec_buf,
5871                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
5872                                     D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
5873    } else {
5874       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
5875                                                D3D12_RESOURCE_STATE_COPY_DEST,
5876                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5877                                                DZN_QUEUE_TRANSITION_FLUSH);
5878    }
5879 
5880    ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
5881 }
5882 
5883 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)5884 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
5885                     float lineWidth)
5886 {
5887    assert(lineWidth == 1.0f);
5888 }
5889 
5890 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)5891 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
5892                     float depthBiasConstantFactor,
5893                     float depthBiasClamp,
5894                     float depthBiasSlopeFactor)
5895 {
5896    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5897    struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
5898 
5899    cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
5900    cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
5901    cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
5902    cmdbuf->state.sysvals.gfx.depth_bias = depthBiasConstantFactor;
5903    if (pdev->options16.DynamicDepthBiasSupported)
5904       cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BIAS;
5905    else
5906       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5907 }
5908 
5909 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])5910 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
5911                          const float blendConstants[4])
5912 {
5913    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5914 
5915    memcpy(cmdbuf->state.blend.constants, blendConstants,
5916           sizeof(cmdbuf->state.blend.constants));
5917    cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5918 }
5919 
5920 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)5921 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
5922                       float minDepthBounds,
5923                       float maxDepthBounds)
5924 {
5925    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5926    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5927    struct dzn_physical_device *pdev =
5928       container_of(device->vk.physical, struct dzn_physical_device, vk);
5929 
5930    if (pdev->options2.DepthBoundsTestSupported) {
5931       cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
5932       cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
5933       cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5934    }
5935 }
5936 
5937 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)5938 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
5939                              VkStencilFaceFlags faceMask,
5940                              uint32_t compareMask)
5941 {
5942    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5943 
5944    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5945       cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
5946       cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
5947    }
5948 
5949    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5950       cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
5951       cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
5952    }
5953 
5954    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
5955    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5956 }
5957 
5958 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)5959 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
5960                            VkStencilFaceFlags faceMask,
5961                            uint32_t writeMask)
5962 {
5963    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5964 
5965    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5966       cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
5967       cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
5968    }
5969 
5970    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5971       cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
5972       cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
5973    }
5974 
5975    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
5976    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5977 }
5978 
5979 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)5980 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
5981                            VkStencilFaceFlags faceMask,
5982                            uint32_t reference)
5983 {
5984    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5985 
5986    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
5987       cmdbuf->state.zsa.stencil_test.front.ref = reference;
5988 
5989    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
5990       cmdbuf->state.zsa.stencil_test.back.ref = reference;
5991 
5992    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5993 }
5994