1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30
31 #include "dxil_spirv_nir.h"
32
33 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)34 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
35 D3D12_RESOURCE_BARRIER *barriers,
36 uint32_t barrier_count)
37 {
38 assert(!cmdbuf->enhanced_barriers);
39 uint32_t flush_count = 0;
40 for (uint32_t b = 0; b < barrier_count; b++) {
41 assert(barriers[b].Transition.pResource);
42
43 /* some layouts map to the same states, and NOP-barriers are illegal */
44 if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
45 if (flush_count) {
46 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
47 &barriers[b - flush_count]);
48 flush_count = 0;
49 }
50 } else {
51 flush_count++;
52 }
53 }
54
55 if (flush_count)
56 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
57 &barriers[barrier_count - flush_count]);
58
59 /* Set Before = After so we don't execute the same barrier twice. */
60 for (uint32_t b = 0; b < barrier_count; b++)
61 barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
62 }
63
64 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)65 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
66 ID3D12Resource *res,
67 uint32_t first_subres,
68 uint32_t subres_count)
69 {
70 assert(!cmdbuf->enhanced_barriers);
71 struct hash_entry *he =
72 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
73 D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
74
75 if (!barriers)
76 return;
77
78 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
79 }
80
81 enum dzn_queue_transition_flags {
82 DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
83 DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
84 };
85
86 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)87 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
88 ID3D12Resource *res,
89 uint32_t first_subres,
90 uint32_t subres_count,
91 D3D12_RESOURCE_STATES before,
92 D3D12_RESOURCE_STATES after,
93 uint32_t flags)
94 {
95 assert(!cmdbuf->enhanced_barriers);
96 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
97 struct hash_entry *he =
98 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
99 struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
100
101 if (!barriers) {
102 D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
103 D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
104 ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
105 uint32_t barrier_count =
106 fmt_info.PlaneCount *
107 desc.MipLevels * desc.DepthOrArraySize;
108
109 barriers =
110 vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
111 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
112 if (!barriers)
113 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
114
115 he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
116 if (!he)
117 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
118 }
119
120 for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
121 if (!barriers[subres].Transition.pResource) {
122 barriers[subres] = (D3D12_RESOURCE_BARRIER) {
123 .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
124 .Flags = 0,
125 .Transition = {
126 .pResource = res,
127 .Subresource = subres,
128 .StateBefore = before,
129 .StateAfter = after,
130 },
131 };
132 } else {
133 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
134 before = barriers[subres].Transition.StateAfter;
135
136 assert(barriers[subres].Transition.StateAfter == before ||
137 barriers[subres].Transition.StateAfter == after);
138 barriers[subres].Transition.StateAfter = after;
139 }
140 }
141
142 if (flags & DZN_QUEUE_TRANSITION_FLUSH)
143 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
144
145 return VK_SUCCESS;
146 }
147
148 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)149 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
150 const struct dzn_image *image,
151 const VkImageSubresourceRange *range,
152 D3D12_RESOURCE_STATES before,
153 D3D12_RESOURCE_STATES after,
154 uint32_t flags)
155 {
156 assert(!cmdbuf->enhanced_barriers);
157 uint32_t first_barrier = 0, barrier_count = 0;
158 VkResult ret = VK_SUCCESS;
159
160 dzn_foreach_aspect(aspect, range->aspectMask) {
161 uint32_t layer_count = dzn_get_layer_count(image, range);
162 uint32_t level_count = dzn_get_level_count(image, range);
163 for (uint32_t layer = 0; layer < layer_count; layer++) {
164 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165 if (!barrier_count) {
166 first_barrier = subres;
167 barrier_count = level_count;
168 continue;
169 } else if (first_barrier + barrier_count == subres) {
170 barrier_count += level_count;
171 continue;
172 }
173
174 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175 first_barrier, barrier_count,
176 before, after, flags);
177 if (ret != VK_SUCCESS)
178 return ret;
179
180 barrier_count = 0;
181 }
182
183 if (barrier_count) {
184 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185 first_barrier, barrier_count,
186 before, after, flags);
187 if (ret != VK_SUCCESS)
188 return ret;
189 }
190 }
191
192 return VK_SUCCESS;
193 }
194
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197 const struct dzn_image *image,
198 const VkImageSubresourceRange *range,
199 VkImageLayout old_layout,
200 VkImageLayout new_layout,
201 uint32_t flags)
202 {
203 assert(!cmdbuf->enhanced_barriers);
204 uint32_t first_barrier = 0, barrier_count = 0;
205 VkResult ret = VK_SUCCESS;
206
207 if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
208 flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
209
210 dzn_foreach_aspect(aspect, range->aspectMask) {
211 D3D12_RESOURCE_STATES after =
212 dzn_image_layout_to_state(image, new_layout, aspect, cmdbuf->type);
213 D3D12_RESOURCE_STATES before =
214 (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
215 old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
216 D3D12_RESOURCE_STATE_COMMON :
217 dzn_image_layout_to_state(image, old_layout, aspect, cmdbuf->type);
218
219 uint32_t layer_count = dzn_get_layer_count(image, range);
220 uint32_t level_count = dzn_get_level_count(image, range);
221 for (uint32_t layer = 0; layer < layer_count; layer++) {
222 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
223 if (!barrier_count) {
224 first_barrier = subres;
225 barrier_count = level_count;
226 continue;
227 } else if (first_barrier + barrier_count == subres) {
228 barrier_count += level_count;
229 continue;
230 }
231
232 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
233 first_barrier, barrier_count,
234 before, after, flags);
235 if (ret != VK_SUCCESS)
236 return ret;
237
238 barrier_count = 0;
239 }
240
241 if (barrier_count) {
242 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
243 first_barrier, barrier_count,
244 before, after, flags);
245 if (ret != VK_SUCCESS)
246 return ret;
247 }
248 }
249
250 return VK_SUCCESS;
251 }
252
253 static void
dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer * cmdbuf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)254 dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer *cmdbuf,
255 D3D12_BARRIER_SYNC sync_before,
256 D3D12_BARRIER_SYNC sync_after,
257 D3D12_BARRIER_ACCESS access_before,
258 D3D12_BARRIER_ACCESS access_after)
259 {
260 assert(cmdbuf->enhanced_barriers);
261 D3D12_GLOBAL_BARRIER global = {
262 .SyncBefore = sync_before,
263 .SyncAfter = sync_after,
264 .AccessBefore = access_before,
265 .AccessAfter = access_after,
266 };
267 D3D12_BARRIER_GROUP group = {
268 .Type = D3D12_BARRIER_TYPE_GLOBAL,
269 .NumBarriers = 1,
270 .pGlobalBarriers = &global,
271 };
272 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
273 }
274
275 static void
dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * buf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)276 dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer *cmdbuf,
277 ID3D12Resource *buf,
278 D3D12_BARRIER_SYNC sync_before,
279 D3D12_BARRIER_SYNC sync_after,
280 D3D12_BARRIER_ACCESS access_before,
281 D3D12_BARRIER_ACCESS access_after)
282 {
283 assert(cmdbuf->enhanced_barriers);
284 D3D12_BUFFER_BARRIER buffer = {
285 .SyncBefore = sync_before,
286 .SyncAfter = sync_after,
287 .AccessBefore = access_before,
288 .AccessAfter = access_after,
289 .pResource = buf,
290 .Offset = 0,
291 .Size = UINT64_MAX,
292 };
293 D3D12_BARRIER_GROUP group = {
294 .Type = D3D12_BARRIER_TYPE_BUFFER,
295 .NumBarriers = 1,
296 .pBufferBarriers = &buffer,
297 };
298 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
299 }
300
301 static void
dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after,D3D12_BARRIER_LAYOUT layout_before,D3D12_BARRIER_LAYOUT layout_after,const VkImageSubresourceRange * range)302 dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer *cmdbuf,
303 const struct dzn_image *image,
304 D3D12_BARRIER_SYNC sync_before,
305 D3D12_BARRIER_SYNC sync_after,
306 D3D12_BARRIER_ACCESS access_before,
307 D3D12_BARRIER_ACCESS access_after,
308 D3D12_BARRIER_LAYOUT layout_before,
309 D3D12_BARRIER_LAYOUT layout_after,
310 const VkImageSubresourceRange *range)
311 {
312 assert(cmdbuf->enhanced_barriers);
313 uint32_t first_plane = (range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
314 uint32_t plane_count = first_plane == 0 && (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? 2 : 1;
315 D3D12_TEXTURE_BARRIER texture = {
316 .SyncBefore = sync_before,
317 .SyncAfter = sync_after,
318 .AccessBefore = access_before,
319 .AccessAfter = access_after,
320 .LayoutBefore = layout_before,
321 .LayoutAfter = layout_after,
322 .Subresources.FirstArraySlice = range->baseArrayLayer,
323 .Subresources.NumArraySlices = dzn_get_layer_count(image, range),
324 .Subresources.IndexOrFirstMipLevel = range->baseMipLevel,
325 .Subresources.NumMipLevels = dzn_get_level_count(image, range),
326 .Subresources.FirstPlane = first_plane,
327 .Subresources.NumPlanes = plane_count,
328 .pResource = image->res,
329 };
330 D3D12_BARRIER_GROUP group = {
331 .Type = D3D12_BARRIER_TYPE_TEXTURE,
332 .NumBarriers = 1,
333 .pTextureBarriers = &texture,
334 };
335 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
336 }
337
338 static D3D12_BARRIER_LAYOUT
dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout current_layout,D3D12_BARRIER_LAYOUT needed_layout,const VkImageSubresourceRange * range)339 dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer *cmdbuf,
340 const struct dzn_image *image,
341 VkImageLayout current_layout,
342 D3D12_BARRIER_LAYOUT needed_layout,
343 const VkImageSubresourceRange *range)
344 {
345 assert(cmdbuf->enhanced_barriers);
346 /* We shouldn't need these fixups on a subresource range which includes depth and stencil,
347 where one is read-only and the other is writable */
348 if (range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
349 assert(current_layout != VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL &&
350 current_layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL);
351 }
352
353 /* Nothing needs to be done for these, the appropriate sync/access was already handled */
354 if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
355 return needed_layout;
356
357 D3D12_BARRIER_LAYOUT current_d3d_layout = dzn_vk_layout_to_d3d_layout(current_layout, cmdbuf->type, range->aspectMask);
358 if (current_d3d_layout != needed_layout) {
359 dzn_cmd_buffer_image_barrier(cmdbuf, image,
360 D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
361 D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON,
362 current_d3d_layout, needed_layout, range);
363 }
364 return current_d3d_layout;
365 }
366
367 static void
dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync,D3D12_BARRIER_ACCESS access,D3D12_BARRIER_LAYOUT needed_layout,D3D12_BARRIER_LAYOUT restore_layout,const VkImageSubresourceRange * range)368 dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer *cmdbuf,
369 const struct dzn_image *image,
370 D3D12_BARRIER_SYNC sync,
371 D3D12_BARRIER_ACCESS access,
372 D3D12_BARRIER_LAYOUT needed_layout,
373 D3D12_BARRIER_LAYOUT restore_layout,
374 const VkImageSubresourceRange *range)
375 {
376 if (needed_layout != restore_layout) {
377 dzn_cmd_buffer_image_barrier(cmdbuf, image,
378 sync, D3D12_BARRIER_SYNC_COPY,
379 access, D3D12_BARRIER_ACCESS_COMMON,
380 needed_layout, restore_layout, range);
381 }
382 }
383
384 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)385 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
386 {
387 if (!cbuf)
388 return;
389
390 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
391
392 if (cmdbuf->cmdlist)
393 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
394
395 if (cmdbuf->cmdlist8)
396 ID3D12GraphicsCommandList8_Release(cmdbuf->cmdlist8);
397
398 if (cmdbuf->cmdlist9)
399 ID3D12GraphicsCommandList9_Release(cmdbuf->cmdlist9);
400
401 if (cmdbuf->cmdalloc)
402 ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
403
404 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
405 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
406 list_del(&res->link);
407 ID3D12Resource_Release(res->res);
408 vk_free(&cbuf->pool->alloc, res);
409 }
410 }
411
412 dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
413 dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
414 dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
415 dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
416 util_dynarray_fini(&cmdbuf->events.signal);
417 util_dynarray_fini(&cmdbuf->queries.reset);
418 util_dynarray_fini(&cmdbuf->queries.signal);
419
420 if (cmdbuf->rtvs.ht) {
421 hash_table_foreach(cmdbuf->rtvs.ht, he)
422 vk_free(&cbuf->pool->alloc, he->data);
423 _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
424 }
425
426 if (cmdbuf->dsvs.ht) {
427 hash_table_foreach(cmdbuf->dsvs.ht, he)
428 vk_free(&cbuf->pool->alloc, he->data);
429 _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
430 }
431
432 if (cmdbuf->events.ht)
433 _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
434
435 if (cmdbuf->queries.ht) {
436 hash_table_foreach(cmdbuf->queries.ht, he) {
437 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
438 util_dynarray_fini(&qpstate->reset);
439 util_dynarray_fini(&qpstate->collect);
440 util_dynarray_fini(&qpstate->signal);
441 util_dynarray_fini(&qpstate->zero);
442 vk_free(&cbuf->pool->alloc, he->data);
443 }
444 _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
445 }
446
447 if (cmdbuf->transition_barriers) {
448 hash_table_foreach(cmdbuf->transition_barriers, he)
449 vk_free(&cbuf->pool->alloc, he->data);
450 _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
451 }
452
453 vk_command_buffer_finish(&cmdbuf->vk);
454 vk_free(&cbuf->pool->alloc, cmdbuf);
455 }
456
457 static void
dzn_cmd_buffer_reset(struct vk_command_buffer * cbuf,VkCommandBufferResetFlags flags)458 dzn_cmd_buffer_reset(struct vk_command_buffer *cbuf, VkCommandBufferResetFlags flags)
459 {
460 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
461
462 /* Reset the state */
463 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
464 cmdbuf->state.multiview.num_views = 1;
465 cmdbuf->state.multiview.view_mask = 1;
466
467 /* TODO: Return resources to the pool */
468 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
469 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
470 list_del(&res->link);
471 ID3D12Resource_Release(res->res);
472 vk_free(&cmdbuf->vk.pool->alloc, res);
473 }
474 }
475 cmdbuf->cur_upload_buf = NULL;
476
477 util_dynarray_clear(&cmdbuf->events.signal);
478 util_dynarray_clear(&cmdbuf->queries.reset);
479 util_dynarray_clear(&cmdbuf->queries.signal);
480 hash_table_foreach(cmdbuf->rtvs.ht, he)
481 vk_free(&cmdbuf->vk.pool->alloc, he->data);
482 _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
483 cmdbuf->null_rtv.ptr = 0;
484 dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
485 hash_table_foreach(cmdbuf->dsvs.ht, he)
486 vk_free(&cmdbuf->vk.pool->alloc, he->data);
487 _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
488 hash_table_foreach(cmdbuf->queries.ht, he) {
489 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
490 util_dynarray_fini(&qpstate->reset);
491 util_dynarray_fini(&qpstate->collect);
492 util_dynarray_fini(&qpstate->signal);
493 util_dynarray_fini(&qpstate->zero);
494 vk_free(&cmdbuf->vk.pool->alloc, he->data);
495 }
496 _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
497 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
498 hash_table_foreach(cmdbuf->transition_barriers, he)
499 vk_free(&cmdbuf->vk.pool->alloc, he->data);
500 _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
501 dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
502 dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
503 dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
504
505 if (cmdbuf->vk.state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING &&
506 cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
507 ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
508
509 vk_command_buffer_reset(&cmdbuf->vk);
510
511 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
512 ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
513 }
514
515 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)516 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
517 {
518 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
519 }
520
521 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)522 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
523 {
524 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
525 }
526
527 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)528 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
529 {
530 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
531 }
532
533 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)534 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
535 {
536 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
537 }
538
539 static const struct vk_command_buffer_ops cmd_buffer_ops = {
540 .destroy = dzn_cmd_buffer_destroy,
541 .reset = dzn_cmd_buffer_reset,
542 };
543
544 static const D3D12_BARRIER_SYNC cmd_buffer_valid_sync[] = {
545 [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_SYNC_VIDEO_DECODE |
546 D3D12_BARRIER_SYNC_VIDEO_PROCESS |
547 D3D12_BARRIER_SYNC_VIDEO_ENCODE),
548 [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_SYNC_ALL |
549 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
550 D3D12_BARRIER_SYNC_RAYTRACING |
551 D3D12_BARRIER_SYNC_COPY |
552 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT |
553 D3D12_BARRIER_SYNC_PREDICATION |
554 D3D12_BARRIER_SYNC_ALL_SHADING |
555 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING |
556 D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
557 D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW |
558 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
559 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE),
560 [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_SYNC_ALL | D3D12_BARRIER_SYNC_COPY
561 };
562 static const D3D12_BARRIER_ACCESS cmd_buffer_valid_access[] = {
563 [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ |
564 D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE |
565 D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ |
566 D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE |
567 D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ |
568 D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE),
569 [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
570 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
571 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
572 D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
573 D3D12_BARRIER_ACCESS_PREDICATION |
574 D3D12_BARRIER_ACCESS_COPY_DEST |
575 D3D12_BARRIER_ACCESS_COPY_SOURCE |
576 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ |
577 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE),
578 [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_COPY_DEST,
579 };
580
581 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)582 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
583 VkCommandBuffer *out)
584 {
585 VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
586 struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
587 struct dzn_physical_device *pdev =
588 container_of(device->vk.physical, struct dzn_physical_device, vk);
589
590 assert(pool->queue_family_index < pdev->queue_family_count);
591
592 D3D12_COMMAND_LIST_TYPE type =
593 pdev->queue_families[pool->queue_family_index].desc.Type;
594
595 struct dzn_cmd_buffer *cmdbuf =
596 vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
597 if (!cmdbuf)
598 return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
599
600 VkResult result =
601 vk_command_buffer_init(pool, &cmdbuf->vk, &cmd_buffer_ops, info->level);
602 if (result != VK_SUCCESS) {
603 vk_free(&pool->alloc, cmdbuf);
604 return result;
605 }
606
607 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
608 cmdbuf->state.multiview.num_views = 1;
609 cmdbuf->state.multiview.view_mask = 1;
610 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket)
611 list_inithead(&cmdbuf->internal_bufs[bucket]);
612 util_dynarray_init(&cmdbuf->events.signal, NULL);
613 util_dynarray_init(&cmdbuf->queries.reset, NULL);
614 util_dynarray_init(&cmdbuf->queries.signal, NULL);
615 dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
616 D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
617 false, &pool->alloc);
618 dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
619 D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
620 false, &pool->alloc);
621 dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
622 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
623 true, &pool->alloc);
624 dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
625 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
626 true, &pool->alloc);
627
628 cmdbuf->events.ht =
629 _mesa_pointer_hash_table_create(NULL);
630 cmdbuf->queries.ht =
631 _mesa_pointer_hash_table_create(NULL);
632 cmdbuf->transition_barriers =
633 _mesa_pointer_hash_table_create(NULL);
634 cmdbuf->rtvs.ht =
635 _mesa_hash_table_create(NULL,
636 dzn_cmd_buffer_rtv_key_hash_function,
637 dzn_cmd_buffer_rtv_key_equals_function);
638 cmdbuf->dsvs.ht =
639 _mesa_hash_table_create(NULL,
640 dzn_cmd_buffer_dsv_key_hash_function,
641 dzn_cmd_buffer_dsv_key_equals_function);
642 if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
643 !cmdbuf->transition_barriers ||
644 !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
645 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
646 goto out;
647 }
648
649 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
650 if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
651 &IID_ID3D12CommandAllocator,
652 (void **)&cmdbuf->cmdalloc))) {
653 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
654 goto out;
655 }
656
657 if (FAILED(ID3D12Device4_CreateCommandList1(device->dev, 0, type,
658 D3D12_COMMAND_LIST_FLAG_NONE,
659 &IID_ID3D12GraphicsCommandList1,
660 (void **)&cmdbuf->cmdlist))) {
661 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
662 goto out;
663 }
664
665 (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList8, (void **)&cmdbuf->cmdlist8);
666 (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList9, (void **)&cmdbuf->cmdlist9);
667 }
668
669 cmdbuf->type = type;
670 cmdbuf->valid_sync = cmd_buffer_valid_sync[type];
671 cmdbuf->valid_access = cmd_buffer_valid_access[type];
672 cmdbuf->enhanced_barriers = pdev->options12.EnhancedBarriersSupported;
673
674 out:
675 if (result != VK_SUCCESS)
676 dzn_cmd_buffer_destroy(&cmdbuf->vk);
677 else
678 *out = dzn_cmd_buffer_to_handle(cmdbuf);
679
680 return result;
681 }
682
683 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)684 dzn_AllocateCommandBuffers(VkDevice device,
685 const VkCommandBufferAllocateInfo *pAllocateInfo,
686 VkCommandBuffer *pCommandBuffers)
687 {
688 VK_FROM_HANDLE(dzn_device, dev, device);
689 VkResult result = VK_SUCCESS;
690 uint32_t i;
691
692 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
693 result = dzn_cmd_buffer_create(pAllocateInfo,
694 &pCommandBuffers[i]);
695 if (result != VK_SUCCESS)
696 break;
697 }
698
699 if (result != VK_SUCCESS) {
700 dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
701 i, pCommandBuffers);
702 for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
703 pCommandBuffers[i] = VK_NULL_HANDLE;
704 }
705
706 return result;
707 }
708
709 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)710 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
711 const VkCommandBufferBeginInfo *info)
712 {
713 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
714 vk_command_buffer_begin(&cmdbuf->vk, info);
715 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
716 ID3D12GraphicsCommandList1_Reset(cmdbuf->cmdlist, cmdbuf->cmdalloc, NULL);
717 return vk_command_buffer_get_record_result(&cmdbuf->vk);
718 }
719
720 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)721 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
722 {
723 if (vk_command_buffer_has_error(&cmdbuf->vk))
724 goto out;
725
726 hash_table_foreach(cmdbuf->events.ht, he) {
727 enum dzn_event_state state = (uintptr_t)he->data;
728
729 struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
730 struct dzn_cmd_event_signal *entry =
731 util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
732
733 if (!entry) {
734 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
735 break;
736 }
737
738 *entry = signal;
739 }
740
741 out:
742 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
743 }
744
745 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)746 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
747 {
748
749 if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
750 return VK_SUCCESS;
751
752 unsigned old_sz = array->size;
753 void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
754 if (!ptr)
755 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
756
757 memset(ptr, 0, array->size - old_sz);
758 return VK_SUCCESS;
759 }
760
761 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)762 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
763 {
764 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
765
766 if (bit < nbits)
767 return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
768
769 return false;
770 }
771
772 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)773 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
774 {
775 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
776 if (result != VK_SUCCESS)
777 return result;
778
779 BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
780 return VK_SUCCESS;
781 }
782
783 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)784 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
785 {
786 if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
787 return;
788
789 BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
790 }
791
792 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)793 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
794 struct util_dynarray *array,
795 uint32_t bit, uint32_t count)
796 {
797 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
798 if (result != VK_SUCCESS)
799 return result;
800
801 BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
802 return VK_SUCCESS;
803 }
804
805 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)806 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
807 struct util_dynarray *array,
808 uint32_t bit, uint32_t count)
809 {
810 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
811
812 if (!nbits)
813 return;
814
815 uint32_t end = MIN2(bit + count, nbits) - 1;
816
817 while (bit <= end) {
818 uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
819 BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
820 bit += subcount;
821 }
822 }
823
824 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)825 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
826 {
827 struct dzn_cmd_buffer_query_pool_state *state =
828 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
829 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
830 if (!state) {
831 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
832 return NULL;
833 }
834
835 util_dynarray_init(&state->reset, NULL);
836 util_dynarray_init(&state->collect, NULL);
837 util_dynarray_init(&state->signal, NULL);
838 util_dynarray_init(&state->zero, NULL);
839 return state;
840 }
841
842 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)843 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
844 struct dzn_cmd_buffer_query_pool_state *state)
845 {
846 util_dynarray_fini(&state->reset);
847 util_dynarray_fini(&state->collect);
848 util_dynarray_fini(&state->signal);
849 util_dynarray_fini(&state->zero);
850 vk_free(&cmdbuf->vk.pool->alloc, state);
851 }
852
853 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)854 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
855 struct dzn_query_pool *qpool)
856 {
857 struct dzn_cmd_buffer_query_pool_state *state = NULL;
858 struct hash_entry *he =
859 _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
860
861 if (!he) {
862 state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
863 if (!state)
864 return NULL;
865
866 he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
867 if (!he) {
868 dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
869 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
870 return NULL;
871 }
872 } else {
873 state = he->data;
874 }
875
876 return state;
877 }
878
879 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)880 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
881 const struct dzn_query_pool *qpool,
882 struct dzn_cmd_buffer_query_pool_state *state,
883 uint32_t first_query,
884 uint32_t query_count)
885 {
886 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
887 uint32_t nbits_collect = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
888 uint32_t nbits_zero = util_dynarray_num_elements(&state->zero, BITSET_WORD) * BITSET_WORDBITS;
889 uint32_t start, end;
890
891 if (!nbits_collect && !nbits_zero)
892 return VK_SUCCESS;
893
894 query_count = MIN2(query_count, MAX2(nbits_collect, nbits_zero) - first_query);
895 nbits_collect = MIN2(first_query + query_count, nbits_collect);
896 nbits_zero = MIN2(first_query + query_count, nbits_zero);
897
898 VkResult result =
899 dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
900 if (result != VK_SUCCESS)
901 return result;
902
903 if (cmdbuf->enhanced_barriers) {
904 /* A global barrier is used because both resolve_buffer and collect_buffer might have been
905 * copied from recently, and it's not worth the effort to track whether that's true. */
906 dzn_cmd_buffer_global_barrier(cmdbuf,
907 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
908 D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
909 } else {
910 dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
911 }
912
913 /* Resolve the valid query regions into the resolve buffer */
914 BITSET_WORD *collect =
915 util_dynarray_element(&state->collect, BITSET_WORD, 0);
916
917 for (start = first_query, end = first_query,
918 __bitset_next_range(&start, &end, collect, nbits_collect);
919 start < nbits_collect;
920 __bitset_next_range(&start, &end, collect, nbits_collect)) {
921 ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
922 qpool->heap,
923 qpool->queries[start].type,
924 start, end - start,
925 qpool->resolve_buffer,
926 qpool->query_size * start);
927 }
928
929 /* Zero out sections of the resolve buffer that contain queries for multi-view rendering
930 * for views other than the first one. */
931 BITSET_WORD *zero =
932 util_dynarray_element(&state->zero, BITSET_WORD, 0);
933 const uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
934
935 for (start = first_query, end = first_query,
936 __bitset_next_range(&start, &end, zero, nbits_zero);
937 start < nbits_zero;
938 __bitset_next_range(&start, &end, zero, nbits_zero)) {
939 uint32_t count = end - start;
940
941 for (unsigned i = 0; i < count; i += step) {
942 uint32_t sub_count = MIN2(step, count - i);
943
944 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
945 qpool->resolve_buffer,
946 dzn_query_pool_get_result_offset(qpool, start + i),
947 device->queries.refs,
948 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
949 qpool->query_size * sub_count);
950 }
951 }
952
953 uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
954 uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
955
956 if (cmdbuf->enhanced_barriers) {
957 dzn_cmd_buffer_buffer_barrier(cmdbuf,
958 qpool->resolve_buffer,
959 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
960 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
961 } else {
962 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
963 0, 1,
964 D3D12_RESOURCE_STATE_COPY_DEST,
965 D3D12_RESOURCE_STATE_COPY_SOURCE,
966 DZN_QUEUE_TRANSITION_FLUSH);
967 }
968
969 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
970 qpool->collect_buffer, offset,
971 qpool->resolve_buffer, offset,
972 size);
973
974 struct query_pass_data {
975 struct util_dynarray *dynarray;
976 BITSET_WORD *bitset;
977 uint32_t count;
978 } passes[] = {
979 { &state->collect, collect, nbits_collect },
980 { &state->zero, zero, nbits_zero }
981 };
982 for (uint32_t pass = 0; pass < ARRAY_SIZE(passes); ++pass) {
983 BITSET_WORD *bitset = passes[pass].bitset;
984 uint32_t nbits = passes[pass].count;
985 for (start = first_query, end = first_query,
986 __bitset_next_range(&start, &end, bitset, nbits);
987 start < nbits;
988 __bitset_next_range(&start, &end, bitset, nbits)) {
989 uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
990 uint32_t count = end - start;
991
992 for (unsigned i = 0; i < count; i += step) {
993 uint32_t sub_count = MIN2(step, count - i);
994
995 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
996 qpool->collect_buffer,
997 dzn_query_pool_get_availability_offset(qpool, start + i),
998 device->queries.refs,
999 DZN_QUERY_REFS_ALL_ONES_OFFSET,
1000 sizeof(uint64_t) * sub_count);
1001 }
1002
1003 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
1004 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, passes[pass].dynarray, start, count);
1005 }
1006 }
1007
1008 if (!cmdbuf->enhanced_barriers) {
1009 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
1010 0, 1,
1011 D3D12_RESOURCE_STATE_COPY_SOURCE,
1012 D3D12_RESOURCE_STATE_COPY_DEST,
1013 0);
1014 }
1015 return VK_SUCCESS;
1016 }
1017
1018 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)1019 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
1020 struct dzn_query_pool *qpool,
1021 struct util_dynarray *bitset_array,
1022 struct util_dynarray *ops_array)
1023 {
1024 BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
1025 uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
1026 uint32_t start, end;
1027
1028 BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
1029 struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
1030 struct dzn_cmd_buffer_query_range *entry =
1031 util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
1032
1033 if (!entry)
1034 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1035
1036 *entry = range;
1037 }
1038
1039 return VK_SUCCESS;
1040 }
1041
1042 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)1043 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
1044 {
1045 hash_table_foreach(cmdbuf->queries.ht, he) {
1046 struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
1047 struct dzn_cmd_buffer_query_pool_state *state = he->data;
1048 VkResult result =
1049 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
1050 if (result != VK_SUCCESS)
1051 return result;
1052
1053 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
1054 if (result != VK_SUCCESS)
1055 return result;
1056
1057 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
1058 if (result != VK_SUCCESS)
1059 return result;
1060 }
1061
1062 return VK_SUCCESS;
1063 }
1064
1065 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)1066 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
1067 {
1068 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1069
1070 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1071 dzn_cmd_buffer_gather_events(cmdbuf);
1072 dzn_cmd_buffer_gather_queries(cmdbuf);
1073 HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
1074 if (FAILED(hres))
1075 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1076 }
1077
1078 return vk_command_buffer_end(&cmdbuf->vk);
1079 }
1080
1081 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1082 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
1083 const VkDependencyInfo *info)
1084 {
1085 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1086
1087 bool execution_barrier =
1088 !info->memoryBarrierCount &&
1089 !info->bufferMemoryBarrierCount &&
1090 !info->imageMemoryBarrierCount;
1091
1092 if (execution_barrier) {
1093 /* Execution barrier can be emulated with a NULL UAV barrier (AKA
1094 * pipeline flush). That's the best we can do with the standard D3D12
1095 * barrier API.
1096 */
1097 D3D12_RESOURCE_BARRIER barrier = {
1098 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
1099 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
1100 .UAV = { .pResource = NULL },
1101 };
1102
1103 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1104 }
1105
1106 /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
1107 * Scopes are not taken into account, but that's inherent to the current
1108 * D3D12 barrier API.
1109 */
1110 if (info->memoryBarrierCount) {
1111 D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
1112
1113 barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1114 barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1115 barriers[0].UAV.pResource = NULL;
1116 barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
1117 barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1118 barriers[1].Aliasing.pResourceBefore = NULL;
1119 barriers[1].Aliasing.pResourceAfter = NULL;
1120 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
1121 }
1122
1123 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
1124 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1125 D3D12_RESOURCE_BARRIER barrier = { 0 };
1126
1127 /* UAV are used only for storage buffers, skip all other buffers. */
1128 if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
1129 continue;
1130
1131 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1132 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1133 barrier.UAV.pResource = buf->res;
1134 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1135 }
1136
1137 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
1138 const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
1139 const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
1140 VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
1141
1142 VkImageLayout old_layout = ibarrier->oldLayout;
1143 VkImageLayout new_layout = ibarrier->newLayout;
1144 if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1145 old_layout == VK_IMAGE_LAYOUT_GENERAL &&
1146 (ibarrier->srcAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1147 old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1148 if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1149 new_layout == VK_IMAGE_LAYOUT_GENERAL &&
1150 (ibarrier->dstAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1151 new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1152 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1153 old_layout,
1154 new_layout,
1155 DZN_QUEUE_TRANSITION_FLUSH);
1156 }
1157 }
1158
1159 /* A straightforward translation of the Vulkan sync flags to D3D sync flags */
1160 static D3D12_BARRIER_SYNC
translate_sync(VkPipelineStageFlags2 flags,bool before)1161 translate_sync(VkPipelineStageFlags2 flags, bool before)
1162 {
1163 if (!before && (flags & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT))
1164 return D3D12_BARRIER_SYNC_ALL;
1165 else if (before && (flags & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
1166 return D3D12_BARRIER_SYNC_ALL;
1167
1168 if (flags & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
1169 /* Theoretically transfer should be less, but it encompasses blit
1170 * (which can be draws) and clears, so bloat it up to everything. */
1171 VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
1172 VK_PIPELINE_STAGE_2_BLIT_BIT))
1173 return D3D12_BARRIER_SYNC_ALL;
1174
1175 D3D12_BARRIER_SYNC ret = D3D12_BARRIER_SYNC_NONE;
1176 if (flags & (VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
1177 VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
1178 VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT))
1179 ret |= D3D12_BARRIER_SYNC_INDEX_INPUT;
1180 if (flags & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT)
1181 ret |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
1182 if (flags & (VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
1183 VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
1184 VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
1185 VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
1186 VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
1187 VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |
1188 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT))
1189 ret |= D3D12_BARRIER_SYNC_NON_PIXEL_SHADING;
1190 if (flags & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
1191 VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR))
1192 ret |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
1193 if (flags & (VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
1194 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT))
1195 ret |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
1196 if (flags & VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT)
1197 ret |= D3D12_BARRIER_SYNC_RENDER_TARGET;
1198 if (flags & VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)
1199 ret |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
1200 if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
1201 ret |= D3D12_BARRIER_SYNC_DRAW;
1202 if (flags & VK_PIPELINE_STAGE_2_COPY_BIT)
1203 ret |= D3D12_BARRIER_SYNC_COPY;
1204 if (flags & VK_PIPELINE_STAGE_2_RESOLVE_BIT)
1205 ret |= D3D12_BARRIER_SYNC_RESOLVE;
1206 if (flags & VK_PIPELINE_STAGE_2_CLEAR_BIT)
1207 ret |= D3D12_BARRIER_SYNC_RENDER_TARGET |
1208 D3D12_BARRIER_SYNC_DEPTH_STENCIL |
1209 D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
1210 if (flags & VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
1211 ret |= D3D12_BARRIER_SYNC_PREDICATION;
1212 if (flags & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
1213 VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
1214 ret |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
1215 if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)
1216 ret |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
1217 if (flags & VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR)
1218 ret |= D3D12_BARRIER_SYNC_RAYTRACING;
1219 if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR)
1220 ret |= D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE;
1221
1222 return ret;
1223 }
1224
1225 /* A straightforward translation of Vulkan access to D3D access */
1226 static D3D12_BARRIER_ACCESS
translate_access(VkAccessFlags2 flags)1227 translate_access(VkAccessFlags2 flags)
1228 {
1229 D3D12_BARRIER_ACCESS ret = D3D12_BARRIER_ACCESS_COMMON;
1230 if (flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
1231 ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
1232 if (flags & VK_ACCESS_2_INDEX_READ_BIT)
1233 ret |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
1234 if (flags & VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT)
1235 ret |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
1236 if (flags & VK_ACCESS_2_UNIFORM_READ_BIT)
1237 ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
1238 if (flags & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
1239 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
1240 ret |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1241 if (flags & VK_ACCESS_2_SHADER_READ_BIT)
1242 ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1243 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1244 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1245 if (flags & (VK_ACCESS_2_SHADER_WRITE_BIT |
1246 VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1247 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT))
1248 ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1249 if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT)
1250 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1251 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1252 if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT)
1253 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1254 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1255 if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT)
1256 ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1257 if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)
1258 ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1259 if (flags & VK_ACCESS_2_TRANSFER_READ_BIT)
1260 ret |= D3D12_BARRIER_ACCESS_COPY_SOURCE |
1261 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1262 if (flags & VK_ACCESS_2_TRANSFER_WRITE_BIT)
1263 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1264 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1265 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1266 D3D12_BARRIER_ACCESS_COPY_DEST |
1267 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1268 if (flags & VK_ACCESS_2_MEMORY_READ_BIT)
1269 ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
1270 D3D12_BARRIER_ACCESS_INDEX_BUFFER |
1271 D3D12_BARRIER_ACCESS_VERTEX_BUFFER |
1272 D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1273 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1274 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1275 D3D12_BARRIER_ACCESS_RENDER_TARGET |
1276 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1277 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1278 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1279 if (flags & VK_ACCESS_2_MEMORY_WRITE_BIT)
1280 ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1281 D3D12_BARRIER_ACCESS_RENDER_TARGET |
1282 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1283 D3D12_BARRIER_ACCESS_COPY_DEST |
1284 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1285 if (flags & (VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
1286 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
1287 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT))
1288 ret |= D3D12_BARRIER_ACCESS_STREAM_OUTPUT;
1289 if (flags & VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)
1290 ret |= D3D12_BARRIER_ACCESS_PREDICATION;
1291 if (flags & VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)
1292 ret |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1293 if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR)
1294 ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
1295 if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
1296 ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
1297 return ret;
1298 }
1299
1300 /* For texture barriers, D3D will validate that the access flags used are actually
1301 * things that were valid for the specified layout. Use the mask returned from here
1302 * to scope down the set of app-provided access flags to make validation happy. */
1303 static D3D12_BARRIER_ACCESS
valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)1304 valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)
1305 {
1306 switch (layout) {
1307 case D3D12_BARRIER_LAYOUT_UNDEFINED:
1308 return D3D12_BARRIER_ACCESS_NO_ACCESS;
1309 case D3D12_BARRIER_LAYOUT_COMMON:
1310 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1311 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1312 D3D12_BARRIER_ACCESS_COPY_DEST;
1313 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON:
1314 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON:
1315 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1316 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1317 D3D12_BARRIER_ACCESS_COPY_DEST |
1318 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1319 case D3D12_BARRIER_LAYOUT_GENERIC_READ:
1320 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ:
1321 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1322 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1323 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1324 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE |
1325 D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1326 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ:
1327 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE|
1328 D3D12_BARRIER_ACCESS_COPY_SOURCE;
1329 case D3D12_BARRIER_LAYOUT_RENDER_TARGET:
1330 return D3D12_BARRIER_ACCESS_RENDER_TARGET;
1331 case D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS:
1332 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS:
1333 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS:
1334 return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1335 case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE:
1336 return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1337 case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ:
1338 return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1339 case D3D12_BARRIER_LAYOUT_SHADER_RESOURCE:
1340 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE:
1341 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE:
1342 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1343 case D3D12_BARRIER_LAYOUT_COPY_SOURCE:
1344 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE:
1345 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE:
1346 return D3D12_BARRIER_ACCESS_COPY_SOURCE;
1347 case D3D12_BARRIER_LAYOUT_COPY_DEST:
1348 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST:
1349 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST:
1350 return D3D12_BARRIER_ACCESS_COPY_DEST;
1351 case D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE:
1352 return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1353 case D3D12_BARRIER_LAYOUT_RESOLVE_DEST:
1354 return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1355 case D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE:
1356 return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1357 default:
1358 return D3D12_BARRIER_ACCESS_COMMON;
1359 }
1360 }
1361
1362 /* Similar to layout -> access, there's access -> sync validation too. D3D
1363 * doesn't like over-synchronizing if you weren't accessing a resource through
1364 * a relevant access bit. */
1365 static D3D12_BARRIER_SYNC
adjust_sync_for_access(D3D12_BARRIER_SYNC in,D3D12_BARRIER_ACCESS access)1366 adjust_sync_for_access(D3D12_BARRIER_SYNC in, D3D12_BARRIER_ACCESS access)
1367 {
1368 /* NO_ACCESS must not add sync */
1369 if (access == D3D12_BARRIER_ACCESS_NO_ACCESS)
1370 return D3D12_BARRIER_SYNC_NONE;
1371 /* SYNC_ALL can be used with any access bits */
1372 if (in == D3D12_BARRIER_SYNC_ALL)
1373 return in;
1374 /* ACCESS_COMMON needs at least one sync bit */
1375 if (access == D3D12_BARRIER_ACCESS_COMMON)
1376 return in == D3D12_BARRIER_SYNC_NONE ? D3D12_BARRIER_SYNC_ALL : in;
1377
1378 D3D12_BARRIER_SYNC out = D3D12_BARRIER_SYNC_NONE;
1379 if (access & D3D12_BARRIER_ACCESS_VERTEX_BUFFER)
1380 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1381 D3D12_BARRIER_SYNC_DRAW |
1382 D3D12_BARRIER_SYNC_ALL_SHADING |
1383 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1384 if (access & D3D12_BARRIER_ACCESS_CONSTANT_BUFFER)
1385 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1386 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1387 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1388 D3D12_BARRIER_SYNC_DRAW |
1389 D3D12_BARRIER_SYNC_ALL_SHADING |
1390 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1391 if (access & D3D12_BARRIER_ACCESS_INDEX_BUFFER)
1392 out |= in & D3D12_BARRIER_SYNC_INDEX_INPUT;
1393 if (access & D3D12_BARRIER_ACCESS_RENDER_TARGET)
1394 out |= in & D3D12_BARRIER_SYNC_RENDER_TARGET;
1395 if (access & D3D12_BARRIER_ACCESS_UNORDERED_ACCESS)
1396 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1397 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1398 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1399 D3D12_BARRIER_SYNC_DRAW |
1400 D3D12_BARRIER_SYNC_ALL_SHADING |
1401 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1402 if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE)
1403 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1404 D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1405 if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ)
1406 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1407 D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1408 if (access & D3D12_BARRIER_ACCESS_SHADER_RESOURCE)
1409 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1410 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1411 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1412 D3D12_BARRIER_SYNC_DRAW |
1413 D3D12_BARRIER_SYNC_ALL_SHADING |
1414 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1415 if (access & D3D12_BARRIER_ACCESS_STREAM_OUTPUT)
1416 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1417 D3D12_BARRIER_SYNC_DRAW |
1418 D3D12_BARRIER_SYNC_ALL_SHADING |
1419 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1420 if (access & D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT)
1421 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1422 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1423 if (access & D3D12_BARRIER_ACCESS_PREDICATION)
1424 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1425 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1426 if (access & (D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE))
1427 out |= in & D3D12_BARRIER_SYNC_COPY;
1428 if (access & (D3D12_BARRIER_ACCESS_RESOLVE_DEST | D3D12_BARRIER_ACCESS_RESOLVE_SOURCE))
1429 out |= in & D3D12_BARRIER_SYNC_RESOLVE;
1430 if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ)
1431 out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1432 D3D12_BARRIER_SYNC_RAYTRACING |
1433 D3D12_BARRIER_SYNC_ALL_SHADING |
1434 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1435 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1436 D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
1437 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1438 if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE)
1439 out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1440 D3D12_BARRIER_SYNC_RAYTRACING |
1441 D3D12_BARRIER_SYNC_ALL_SHADING |
1442 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1443 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1444 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1445 if (access & D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE)
1446 out |= in & (D3D12_BARRIER_SYNC_PIXEL_SHADING |
1447 D3D12_BARRIER_SYNC_ALL_SHADING);
1448 /* SYNC_NONE means it won't be accessed, so if we can't express the app's original intent
1449 * here, then be conservative and over-sync. */
1450 return out ? out : D3D12_BARRIER_SYNC_ALL;
1451 }
1452
1453 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1454 dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,
1455 const VkDependencyInfo *info)
1456 {
1457 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1458
1459 uint32_t num_barrier_groups = 0;
1460 D3D12_BARRIER_GROUP groups[3];
1461
1462 /* Some input image barriers will expand into 2 outputs, and some will turn into buffer barriers.
1463 * Do a first pass and count how much we need to allocate. */
1464 uint32_t num_image_barriers = 0;
1465 uint32_t num_buffer_barriers = info->bufferMemoryBarrierCount;
1466 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1467 VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1468 bool need_separate_aspect_barriers =
1469 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1470 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1471 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1472 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1473 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1474 ++num_buffer_barriers;
1475 else
1476 num_image_barriers += need_separate_aspect_barriers ? 2 : 1;
1477 }
1478
1479 VK_MULTIALLOC(ma);
1480 VK_MULTIALLOC_DECL(&ma, D3D12_GLOBAL_BARRIER, global_barriers, info->memoryBarrierCount);
1481 VK_MULTIALLOC_DECL(&ma, D3D12_BUFFER_BARRIER, buffer_barriers, num_buffer_barriers);
1482 VK_MULTIALLOC_DECL(&ma, D3D12_TEXTURE_BARRIER, texture_barriers, num_image_barriers);
1483
1484 if (ma.size == 0)
1485 return;
1486
1487 if (!vk_multialloc_alloc(&ma, &cmdbuf->vk.pool->alloc,
1488 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
1489 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1490 return;
1491 }
1492
1493 if (info->memoryBarrierCount) {
1494 groups[num_barrier_groups].NumBarriers = info->memoryBarrierCount;
1495 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_GLOBAL;
1496 groups[num_barrier_groups].pGlobalBarriers = global_barriers;
1497 ++num_barrier_groups;
1498 for (uint32_t i = 0; i < info->memoryBarrierCount; ++i) {
1499 global_barriers[i].SyncBefore = translate_sync(info->pMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1500 global_barriers[i].SyncAfter = translate_sync(info->pMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1501 global_barriers[i].AccessBefore = global_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1502 D3D12_BARRIER_ACCESS_NO_ACCESS :
1503 translate_access(info->pMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access;
1504 global_barriers[i].AccessAfter = global_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1505 D3D12_BARRIER_ACCESS_NO_ACCESS :
1506 translate_access(info->pMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access;
1507
1508 if ((global_barriers[i].AccessBefore & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) &&
1509 (global_barriers[i].AccessAfter == D3D12_BARRIER_ACCESS_COMMON ||
1510 global_barriers[i].AccessAfter & ~(D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ))) {
1511 /* D3D validates against a global barrier attempting to transition from depth write to something other than depth write,
1512 * but this is a D3D bug; it's absolutely valid to use a global barrier to transition *multiple* types of accesses.
1513 * The validation does say that you'd need an image barrier to actually get that kind of transition, which is still correct,
1514 * so just remove this bit under the assumption that a dedicated image barrier will be submitted to do any necessary work later. */
1515 global_barriers[i].AccessBefore &= ~D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1516 }
1517 if (global_barriers[i].AccessBefore == D3D12_BARRIER_ACCESS_COMMON)
1518 global_barriers[i].AccessAfter = D3D12_BARRIER_ACCESS_COMMON;
1519 global_barriers[i].SyncBefore = adjust_sync_for_access(global_barriers[i].SyncBefore, global_barriers[i].AccessBefore);
1520 global_barriers[i].SyncAfter = adjust_sync_for_access(global_barriers[i].SyncAfter, global_barriers[i].AccessAfter);
1521 }
1522 }
1523
1524 if (num_buffer_barriers) {
1525 groups[num_barrier_groups].NumBarriers = num_buffer_barriers;
1526 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_BUFFER;
1527 groups[num_barrier_groups].pBufferBarriers = buffer_barriers;
1528 ++num_barrier_groups;
1529 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; ++i) {
1530 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1531 buffer_barriers[i].SyncBefore = translate_sync(info->pBufferMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1532 buffer_barriers[i].SyncAfter = translate_sync(info->pBufferMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1533 buffer_barriers[i].AccessBefore = buffer_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1534 D3D12_BARRIER_ACCESS_NO_ACCESS :
1535 translate_access(info->pBufferMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & buf->valid_access;
1536 buffer_barriers[i].AccessAfter = buffer_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1537 D3D12_BARRIER_ACCESS_NO_ACCESS :
1538 translate_access(info->pBufferMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & buf->valid_access;
1539 buffer_barriers[i].SyncBefore = adjust_sync_for_access(buffer_barriers[i].SyncBefore, buffer_barriers[i].AccessBefore);
1540 buffer_barriers[i].SyncAfter = adjust_sync_for_access(buffer_barriers[i].SyncAfter, buffer_barriers[i].AccessAfter);
1541 buffer_barriers[i].pResource = buf->res;
1542 buffer_barriers[i].Offset = 0;
1543 buffer_barriers[i].Size = UINT64_MAX;
1544 }
1545 }
1546
1547 if (num_image_barriers) {
1548 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_TEXTURE;
1549 groups[num_barrier_groups].pTextureBarriers = texture_barriers;
1550 groups[num_barrier_groups].NumBarriers = num_image_barriers;
1551 ++num_barrier_groups;
1552 }
1553
1554 uint32_t tbar = 0;
1555 uint32_t bbar = info->bufferMemoryBarrierCount;
1556 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1557 VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1558
1559 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) {
1560 /* Barriers on linear images turn into buffer barriers */
1561 buffer_barriers[bbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1562 buffer_barriers[bbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1563 buffer_barriers[bbar].AccessBefore = buffer_barriers[bbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1564 D3D12_BARRIER_ACCESS_NO_ACCESS :
1565 translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & image->valid_access;
1566 buffer_barriers[bbar].AccessAfter = buffer_barriers[bbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1567 D3D12_BARRIER_ACCESS_NO_ACCESS :
1568 translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & image->valid_access;
1569 buffer_barriers[bbar].SyncBefore = adjust_sync_for_access(buffer_barriers[bbar].SyncBefore, buffer_barriers[bbar].AccessBefore);
1570 buffer_barriers[bbar].SyncAfter = adjust_sync_for_access(buffer_barriers[bbar].SyncAfter, buffer_barriers[bbar].AccessAfter);
1571 buffer_barriers[bbar].pResource = image->res;
1572 buffer_barriers[bbar].Offset = 0;
1573 buffer_barriers[bbar].Size = UINT64_MAX;
1574 ++bbar;
1575 continue;
1576 }
1577
1578 const VkImageSubresourceRange *range = &info->pImageMemoryBarriers[i].subresourceRange;
1579 const bool simultaneous_access = image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
1580
1581 bool need_separate_aspect_barriers =
1582 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1583 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1584 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1585 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1586 uint32_t num_aspects = need_separate_aspect_barriers ? 2 : 1;
1587 VkImageAspectFlags aspect_0_mask = need_separate_aspect_barriers ?
1588 (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) : VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM;
1589 VkImageAspectFlags aspects[] = {
1590 range->aspectMask & aspect_0_mask,
1591 range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT,
1592 };
1593
1594 for (uint32_t aspect_idx = 0; aspect_idx < num_aspects; ++aspect_idx) {
1595 VkImageAspectFlags aspect = aspects[aspect_idx];
1596 texture_barriers[tbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1597 texture_barriers[tbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1598 const bool queue_ownership_transfer = info->pImageMemoryBarriers[i].srcQueueFamilyIndex != info->pImageMemoryBarriers[i].dstQueueFamilyIndex;
1599 D3D12_BARRIER_ACCESS layout_before_valid_access = ~0;
1600 D3D12_BARRIER_ACCESS layout_after_valid_access = ~0;
1601 if (simultaneous_access) {
1602 /* Simultaneous access textures never perform layout transitions, and can do any type of access from COMMON layout */
1603 texture_barriers[tbar].LayoutAfter = texture_barriers[tbar].LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED;
1604 } else if (queue_ownership_transfer) {
1605 /* For an ownership transfer, force the foreign layout to COMMON and the matching sync/access to NONE */
1606 assert(info->pImageMemoryBarriers[i].srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1607 assert(info->pImageMemoryBarriers[i].dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1608 const bool is_release = info->pImageMemoryBarriers[i].srcQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1609 const bool is_acquire = info->pImageMemoryBarriers[i].dstQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1610 assert(is_release ^ is_acquire);
1611 texture_barriers[tbar].LayoutBefore = is_acquire ?
1612 D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1613 texture_barriers[tbar].LayoutAfter = is_release ?
1614 D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1615 if (is_acquire) {
1616 texture_barriers[tbar].SyncBefore = D3D12_BARRIER_SYNC_NONE;
1617 texture_barriers[tbar].AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
1618 layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1619 } else {
1620 texture_barriers[tbar].SyncAfter = D3D12_BARRIER_SYNC_NONE;
1621 texture_barriers[tbar].AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
1622 layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1623 }
1624 } else {
1625 texture_barriers[tbar].LayoutBefore = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1626 texture_barriers[tbar].LayoutAfter = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1627 layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1628 layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1629 }
1630
1631 texture_barriers[tbar].AccessBefore = texture_barriers[tbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ||
1632 texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED ?
1633 D3D12_BARRIER_ACCESS_NO_ACCESS :
1634 translate_access(info->pImageMemoryBarriers[i].srcAccessMask) &
1635 cmdbuf->valid_access & image->valid_access & layout_before_valid_access;
1636 texture_barriers[tbar].AccessAfter = texture_barriers[tbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1637 D3D12_BARRIER_ACCESS_NO_ACCESS :
1638 translate_access(info->pImageMemoryBarriers[i].dstAccessMask) &
1639 cmdbuf->valid_access & image->valid_access & layout_after_valid_access;
1640
1641 texture_barriers[tbar].SyncBefore = adjust_sync_for_access(texture_barriers[tbar].SyncBefore, texture_barriers[tbar].AccessBefore);
1642 texture_barriers[tbar].SyncAfter = adjust_sync_for_access(texture_barriers[tbar].SyncAfter, texture_barriers[tbar].AccessAfter);
1643 texture_barriers[tbar].Subresources.FirstArraySlice = range->baseArrayLayer;
1644 texture_barriers[tbar].Subresources.NumArraySlices = dzn_get_layer_count(image, range);
1645 texture_barriers[tbar].Subresources.IndexOrFirstMipLevel = range->baseMipLevel;
1646 texture_barriers[tbar].Subresources.NumMipLevels = dzn_get_level_count(image, range);
1647 texture_barriers[tbar].Subresources.FirstPlane = aspect_idx;
1648 texture_barriers[tbar].Subresources.NumPlanes = util_bitcount(aspect);
1649 texture_barriers[tbar].pResource = image->res;
1650 texture_barriers[tbar].Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE;
1651 if (texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED)
1652 texture_barriers[tbar].Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
1653 ++tbar;
1654 }
1655 }
1656 assert(bbar == num_buffer_barriers);
1657 assert(tbar == num_image_barriers);
1658
1659 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, num_barrier_groups, groups);
1660
1661 vk_free(&cmdbuf->vk.pool->alloc, global_barriers);
1662 }
1663
1664 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)1665 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
1666 const struct dzn_image *image,
1667 const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
1668 {
1669 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1670 struct dzn_cmd_buffer_dsv_key key = { image, *desc };
1671 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
1672 struct dzn_cmd_buffer_dsv_entry *dsve;
1673
1674 if (!he) {
1675 struct dzn_descriptor_heap *heap;
1676 uint32_t slot;
1677
1678 // TODO: error handling
1679 dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
1680 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1681 dsve->key = key;
1682 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
1683 dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1684 ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
1685 _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
1686 } else {
1687 dsve = he->data;
1688 }
1689
1690 return dsve->handle;
1691 }
1692
1693 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1694 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1695 const struct dzn_image *image,
1696 const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1697 {
1698 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1699 struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1700 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1701 struct dzn_cmd_buffer_rtv_entry *rtve;
1702
1703 if (!he) {
1704 struct dzn_descriptor_heap *heap;
1705 uint32_t slot;
1706
1707 // TODO: error handling
1708 rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1709 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1710 rtve->key = key;
1711 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1712 rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1713 ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1714 he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1715 } else {
1716 rtve = he->data;
1717 }
1718
1719 return rtve->handle;
1720 }
1721
1722 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1723 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1724 {
1725 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1726
1727 if (!cmdbuf->null_rtv.ptr) {
1728 struct dzn_descriptor_heap *heap;
1729 uint32_t slot;
1730 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1731 cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1732
1733 D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1734 desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1735 desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1736 desc.Texture2D.MipSlice = 0;
1737 desc.Texture2D.PlaneSlice = 0;
1738
1739 ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1740 }
1741
1742 return cmdbuf->null_rtv;
1743 }
1744
1745 static D3D12_HEAP_TYPE
heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)1746 heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)
1747 {
1748 switch (bucket) {
1749 case DZN_INTERNAL_BUF_UPLOAD: return D3D12_HEAP_TYPE_UPLOAD;
1750 case DZN_INTERNAL_BUF_DEFAULT: return D3D12_HEAP_TYPE_DEFAULT;
1751 default: unreachable("Invalid value");
1752 }
1753 }
1754
1755 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,enum dzn_internal_buf_bucket bucket,D3D12_RESOURCE_STATES init_state,uint64_t align,ID3D12Resource ** out,uint64_t * offset)1756 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1757 uint32_t size,
1758 enum dzn_internal_buf_bucket bucket,
1759 D3D12_RESOURCE_STATES init_state,
1760 uint64_t align,
1761 ID3D12Resource **out,
1762 uint64_t *offset)
1763 {
1764 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1765 ID3D12Resource *res;
1766 *out = NULL;
1767 D3D12_HEAP_TYPE heap_type = heap_type_for_bucket(bucket);
1768
1769 if (bucket == DZN_INTERNAL_BUF_UPLOAD && cmdbuf->cur_upload_buf) {
1770 uint64_t new_offset = ALIGN_POT(cmdbuf->cur_upload_buf_offset, align);
1771 if (cmdbuf->cur_upload_buf->size >= size + new_offset) {
1772 cmdbuf->cur_upload_buf_offset = new_offset + size;
1773 *out = cmdbuf->cur_upload_buf->res;
1774 *offset = new_offset;
1775 return VK_SUCCESS;
1776 }
1777 cmdbuf->cur_upload_buf = NULL;
1778 cmdbuf->cur_upload_buf_offset = 0;
1779 }
1780
1781 uint32_t alloc_size = size;
1782 if (bucket == DZN_INTERNAL_BUF_UPLOAD)
1783 /* Walk through a 4MB upload buffer */
1784 alloc_size = ALIGN_POT(size, 4 * 1024 * 1024);
1785 else
1786 /* Align size on 64k (the default alignment) */
1787 alloc_size = ALIGN_POT(size, 64 * 1024);
1788
1789 D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_type);
1790 D3D12_RESOURCE_DESC rdesc = {
1791 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1792 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1793 .Width = alloc_size,
1794 .Height = 1,
1795 .DepthOrArraySize = 1,
1796 .MipLevels = 1,
1797 .Format = DXGI_FORMAT_UNKNOWN,
1798 .SampleDesc = { .Count = 1, .Quality = 0 },
1799 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1800 .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1801 };
1802
1803 HRESULT hres =
1804 ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1805 D3D12_HEAP_FLAG_NONE, &rdesc,
1806 init_state, NULL,
1807 &IID_ID3D12Resource,
1808 (void **)&res);
1809 if (FAILED(hres)) {
1810 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1811 }
1812
1813 struct dzn_internal_resource *entry =
1814 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1815 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1816 if (!entry) {
1817 ID3D12Resource_Release(res);
1818 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1819 }
1820
1821 entry->res = res;
1822 entry->size = alloc_size;
1823 list_addtail(&entry->link, &cmdbuf->internal_bufs[bucket]);
1824 *out = entry->res;
1825 if (offset)
1826 *offset = 0;
1827 if (bucket == DZN_INTERNAL_BUF_UPLOAD) {
1828 cmdbuf->cur_upload_buf = entry;
1829 cmdbuf->cur_upload_buf_offset = size;
1830 }
1831 return VK_SUCCESS;
1832 }
1833
1834 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1835 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1836 const struct dzn_image *image,
1837 VkImageLayout layout,
1838 const VkClearColorValue *color,
1839 const VkImageSubresourceRange *range,
1840 uint32_t rect_count, D3D12_RECT *rects)
1841 {
1842 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1843 uint32_t blksize = util_format_get_blocksize(pfmt);
1844 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1845 uint32_t raw[4] = { 0 };
1846
1847 assert(blksize <= sizeof(raw));
1848 assert(!(sizeof(buf) % blksize));
1849
1850 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1851
1852 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1853 while (fill_step % blksize)
1854 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1855
1856 uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1857 uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1858 uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1859 uint32_t res_size = max_h * row_pitch;
1860
1861 assert(fill_step <= sizeof(buf));
1862
1863 for (uint32_t i = 0; i < fill_step; i += blksize)
1864 memcpy(&buf[i], raw, blksize);
1865
1866 ID3D12Resource *src_res;
1867 uint64_t src_offset;
1868
1869 VkResult result =
1870 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1871 DZN_INTERNAL_BUF_UPLOAD,
1872 D3D12_RESOURCE_STATE_GENERIC_READ,
1873 D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
1874 &src_res,
1875 &src_offset);
1876 if (result != VK_SUCCESS)
1877 return;
1878
1879 assert(!(res_size % fill_step));
1880
1881 uint8_t *cpu_ptr;
1882 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1883 cpu_ptr += src_offset;
1884 for (uint32_t i = 0; i < res_size; i += fill_step)
1885 memcpy(&cpu_ptr[i], buf, fill_step);
1886
1887 ID3D12Resource_Unmap(src_res, 0, NULL);
1888
1889 D3D12_TEXTURE_COPY_LOCATION src_loc = {
1890 .pResource = src_res,
1891 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1892 .PlacedFootprint = {
1893 .Offset = src_offset,
1894 .Footprint = {
1895 .Width = max_w,
1896 .Height = max_h,
1897 .Depth = 1,
1898 .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1899 },
1900 },
1901 };
1902
1903 if (!cmdbuf->enhanced_barriers) {
1904 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1905 layout,
1906 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1907 DZN_QUEUE_TRANSITION_FLUSH);
1908 }
1909
1910 assert(dzn_get_level_count(image, range) == 1);
1911 uint32_t layer_count = dzn_get_layer_count(image, range);
1912
1913 dzn_foreach_aspect(aspect, range->aspectMask) {
1914 VkImageSubresourceLayers subres = {
1915 .aspectMask = (VkImageAspectFlags)aspect,
1916 .mipLevel = range->baseMipLevel,
1917 .baseArrayLayer = range->baseArrayLayer,
1918 .layerCount = layer_count,
1919 };
1920
1921 for (uint32_t layer = 0; layer < layer_count; layer++) {
1922 D3D12_TEXTURE_COPY_LOCATION dst_loc =
1923 dzn_image_get_copy_loc(image, &subres, aspect, layer);
1924
1925 src_loc.PlacedFootprint.Footprint.Format =
1926 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1927 dst_loc.PlacedFootprint.Footprint.Format :
1928 image->desc.Format;
1929
1930 for (uint32_t r = 0; r < rect_count; r++) {
1931 D3D12_BOX src_box = {
1932 .left = 0,
1933 .top = 0,
1934 .front = 0,
1935 .right = (UINT)(rects[r].right - rects[r].left),
1936 .bottom = (UINT)(rects[r].bottom - rects[r].top),
1937 .back = 1,
1938 };
1939
1940 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1941 &dst_loc,
1942 rects[r].left,
1943 rects[r].top, 0,
1944 &src_loc,
1945 &src_box);
1946 }
1947 }
1948 }
1949
1950 if (!cmdbuf->enhanced_barriers) {
1951 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1952 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1953 layout,
1954 DZN_QUEUE_TRANSITION_FLUSH);
1955 }
1956 }
1957
1958 static VkClearColorValue
adjust_clear_color(struct dzn_physical_device * pdev,VkFormat format,const VkClearColorValue * col)1959 adjust_clear_color(struct dzn_physical_device *pdev,
1960 VkFormat format, const VkClearColorValue *col)
1961 {
1962 VkClearColorValue out = *col;
1963
1964 // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1965 // manually where it matters, like here, in the clear path.
1966 if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1967 if (pdev->support_a4b4g4r4) {
1968 DZN_SWAP(float, out.float32[0], out.float32[2]);
1969 } else {
1970 DZN_SWAP(float, out.float32[0], out.float32[1]);
1971 DZN_SWAP(float, out.float32[2], out.float32[3]);
1972 }
1973 }
1974
1975 return out;
1976 }
1977
1978 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1979 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1980 const struct dzn_image *image,
1981 VkImageLayout layout,
1982 const VkClearColorValue *color,
1983 uint32_t range_count,
1984 const VkImageSubresourceRange *ranges)
1985 {
1986 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1987 uint32_t blksize = util_format_get_blocksize(pfmt);
1988 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1989 uint32_t raw[4] = { 0 };
1990
1991 assert(blksize <= sizeof(raw));
1992 assert(!(sizeof(buf) % blksize));
1993
1994 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1995
1996 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1997 while (fill_step % blksize)
1998 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1999
2000 uint32_t res_size = 0;
2001 for (uint32_t r = 0; r < range_count; r++) {
2002 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
2003 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
2004 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
2005 uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
2006
2007 res_size = MAX2(res_size, h * d * row_pitch);
2008 }
2009
2010 assert(fill_step <= sizeof(buf));
2011
2012 for (uint32_t i = 0; i < fill_step; i += blksize)
2013 memcpy(&buf[i], raw, blksize);
2014
2015 ID3D12Resource *src_res;
2016 uint64_t src_offset;
2017
2018 VkResult result =
2019 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
2020 DZN_INTERNAL_BUF_UPLOAD,
2021 D3D12_RESOURCE_STATE_GENERIC_READ,
2022 D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
2023 &src_res,
2024 &src_offset);
2025 if (result != VK_SUCCESS)
2026 return;
2027
2028 assert(!(res_size % fill_step));
2029
2030 uint8_t *cpu_ptr;
2031 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
2032 cpu_ptr += src_offset;
2033 for (uint32_t i = 0; i < res_size; i += fill_step)
2034 memcpy(&cpu_ptr[i], buf, fill_step);
2035
2036 ID3D12Resource_Unmap(src_res, 0, NULL);
2037
2038 D3D12_TEXTURE_COPY_LOCATION src_loc = {
2039 .pResource = src_res,
2040 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
2041 .PlacedFootprint = {
2042 .Offset = src_offset,
2043 },
2044 };
2045
2046 for (uint32_t r = 0; r < range_count; r++) {
2047 uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
2048 uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
2049
2050 if (!cmdbuf->enhanced_barriers) {
2051 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2052 layout,
2053 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2054 DZN_QUEUE_TRANSITION_FLUSH);
2055 }
2056
2057 dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
2058 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2059 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
2060 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
2061 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
2062 VkImageSubresourceLayers subres = {
2063 .aspectMask = (VkImageAspectFlags)aspect,
2064 .mipLevel = ranges[r].baseMipLevel + lvl,
2065 .baseArrayLayer = ranges[r].baseArrayLayer,
2066 .layerCount = layer_count,
2067 };
2068
2069 for (uint32_t layer = 0; layer < layer_count; layer++) {
2070 D3D12_TEXTURE_COPY_LOCATION dst_loc =
2071 dzn_image_get_copy_loc(image, &subres, aspect, layer);
2072
2073 src_loc.PlacedFootprint.Footprint.Format =
2074 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
2075 dst_loc.PlacedFootprint.Footprint.Format :
2076 image->desc.Format;
2077 src_loc.PlacedFootprint.Footprint.Width = w;
2078 src_loc.PlacedFootprint.Footprint.Height = h;
2079 src_loc.PlacedFootprint.Footprint.Depth = d;
2080 src_loc.PlacedFootprint.Footprint.RowPitch =
2081 ALIGN_NPOT(w * blksize, fill_step);
2082 D3D12_BOX src_box = {
2083 .left = 0,
2084 .top = 0,
2085 .front = 0,
2086 .right = w,
2087 .bottom = h,
2088 .back = d,
2089 };
2090
2091 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
2092 &src_loc, &src_box);
2093
2094 }
2095 }
2096 }
2097
2098 if (!cmdbuf->enhanced_barriers) {
2099 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2100 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2101 layout,
2102 DZN_QUEUE_TRANSITION_FLUSH);
2103 }
2104 }
2105 }
2106
2107 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)2108 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
2109 struct dzn_image_view *view,
2110 VkImageLayout layout,
2111 const VkClearValue *value,
2112 VkImageAspectFlags aspects,
2113 uint32_t base_layer,
2114 uint32_t layer_count,
2115 uint32_t rect_count,
2116 D3D12_RECT *rects)
2117 {
2118 struct dzn_image *image =
2119 container_of(view->vk.image, struct dzn_image, vk);
2120 struct dzn_physical_device *pdev =
2121 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2122
2123 VkImageSubresourceRange range = {
2124 .aspectMask = aspects,
2125 .baseMipLevel = view->vk.base_mip_level,
2126 .levelCount = 1,
2127 .baseArrayLayer = view->vk.base_array_layer + base_layer,
2128 .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
2129 view->vk.layer_count - base_layer : layer_count,
2130 };
2131
2132 layer_count = vk_image_subresource_layer_count(&image->vk, &range);
2133 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2134
2135 if (vk_format_is_depth_or_stencil(view->vk.format)) {
2136 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2137
2138 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2139 flags |= D3D12_CLEAR_FLAG_DEPTH;
2140 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2141 flags |= D3D12_CLEAR_FLAG_STENCIL;
2142
2143 if (flags != 0) {
2144 if (cmdbuf->enhanced_barriers) {
2145 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2146 layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2147 &range);
2148 } else {
2149 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2150 layout,
2151 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2152 DZN_QUEUE_TRANSITION_FLUSH);
2153 }
2154
2155 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
2156 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2157 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
2158 value->depthStencil.depth,
2159 value->depthStencil.stencil,
2160 rect_count, rects);
2161
2162 if (cmdbuf->enhanced_barriers) {
2163 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2164 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2165 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2166 &range);
2167 } else {
2168 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2169 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2170 layout,
2171 DZN_QUEUE_TRANSITION_FLUSH);
2172 }
2173 }
2174 } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2175 VkClearColorValue color = adjust_clear_color(pdev, view->vk.format, &value->color);
2176 bool clear_with_cpy = false;
2177 float vals[4];
2178
2179 if (vk_format_is_sint(view->vk.format)) {
2180 for (uint32_t i = 0; i < 4; i++) {
2181 vals[i] = color.int32[i];
2182 if (color.int32[i] != (int32_t)vals[i]) {
2183 clear_with_cpy = true;
2184 break;
2185 }
2186 }
2187 } else if (vk_format_is_uint(view->vk.format)) {
2188 for (uint32_t i = 0; i < 4; i++) {
2189 vals[i] = color.uint32[i];
2190 if (color.uint32[i] != (uint32_t)vals[i]) {
2191 clear_with_cpy = true;
2192 break;
2193 }
2194 }
2195 } else {
2196 for (uint32_t i = 0; i < 4; i++)
2197 vals[i] = color.float32[i];
2198 }
2199
2200 if (clear_with_cpy) {
2201 dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
2202 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2203 &value->color,
2204 &range, rect_count, rects);
2205 } else {
2206 if (cmdbuf->enhanced_barriers) {
2207 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2208 layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2209 &range);
2210 } else {
2211 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2212 layout,
2213 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2214 DZN_QUEUE_TRANSITION_FLUSH);
2215 }
2216
2217 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
2218 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2219 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
2220
2221 if (cmdbuf->enhanced_barriers) {
2222 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2223 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2224 D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2225 &range);
2226 } else {
2227 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2228 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2229 layout,
2230 DZN_QUEUE_TRANSITION_FLUSH);
2231 }
2232 }
2233 }
2234 }
2235
2236 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)2237 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
2238 const struct dzn_image *image,
2239 VkImageLayout layout,
2240 const VkClearColorValue *col,
2241 uint32_t range_count,
2242 const VkImageSubresourceRange *ranges)
2243 {
2244 struct dzn_physical_device *pdev =
2245 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2246 if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ||
2247 cmdbuf->type != D3D12_COMMAND_LIST_TYPE_DIRECT) {
2248 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2249 return;
2250 }
2251
2252 VkClearColorValue color = adjust_clear_color(pdev, image->vk.format, col);
2253 float clear_vals[4];
2254
2255 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
2256 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2257
2258 if (util_format_is_pure_sint(pfmt)) {
2259 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2260 clear_vals[c] = color.int32[c];
2261 if (color.int32[c] != (int32_t)clear_vals[c]) {
2262 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2263 return;
2264 }
2265 }
2266 } else if (util_format_is_pure_uint(pfmt)) {
2267 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2268 clear_vals[c] = color.uint32[c];
2269 if (color.uint32[c] != (uint32_t)clear_vals[c]) {
2270 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2271 return;
2272 }
2273 }
2274 } else {
2275 memcpy(clear_vals, color.float32, sizeof(clear_vals));
2276 }
2277
2278 for (uint32_t r = 0; r < range_count; r++) {
2279 const VkImageSubresourceRange *range = &ranges[r];
2280 uint32_t level_count = dzn_get_level_count(image, range);
2281
2282 if (cmdbuf->enhanced_barriers) {
2283 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2284 layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2285 range);
2286 } else {
2287 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2288 layout,
2289 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2290 DZN_QUEUE_TRANSITION_FLUSH);
2291 }
2292
2293 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2294 VkImageSubresourceRange view_range = *range;
2295
2296 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
2297 view_range.baseArrayLayer = 0;
2298 view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
2299 }
2300
2301 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
2302 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2303 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
2304 }
2305
2306 if (cmdbuf->enhanced_barriers) {
2307 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2308 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2309 D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2310 range);
2311 } else {
2312 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2313 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2314 layout,
2315 DZN_QUEUE_TRANSITION_FLUSH);
2316 }
2317 }
2318 }
2319
2320 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)2321 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
2322 const struct dzn_image *image,
2323 VkImageLayout layout,
2324 const VkClearDepthStencilValue *zs,
2325 uint32_t range_count,
2326 const VkImageSubresourceRange *ranges)
2327 {
2328 assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
2329
2330 for (uint32_t r = 0; r < range_count; r++) {
2331 const VkImageSubresourceRange *range = &ranges[r];
2332 uint32_t level_count = dzn_get_level_count(image, range);
2333
2334 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2335 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2336
2337 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2338 flags |= D3D12_CLEAR_FLAG_DEPTH;
2339 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2340 flags |= D3D12_CLEAR_FLAG_STENCIL;
2341
2342 if (cmdbuf->enhanced_barriers) {
2343 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2344 layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2345 range);
2346 } else {
2347 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2348 layout,
2349 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2350 DZN_QUEUE_TRANSITION_FLUSH);
2351 }
2352
2353 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2354 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
2355 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2356 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
2357 handle, flags,
2358 zs->depth,
2359 zs->stencil,
2360 0, NULL);
2361 }
2362
2363 if (cmdbuf->enhanced_barriers) {
2364 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2365 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2366 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2367 range);
2368 } else {
2369 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2370 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2371 layout,
2372 DZN_QUEUE_TRANSITION_FLUSH);
2373 }
2374 }
2375 }
2376
2377 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2378 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
2379 const VkCopyBufferToImageInfo2 *info,
2380 uint32_t r,
2381 VkImageAspectFlagBits aspect,
2382 uint32_t l)
2383 {
2384 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2385 VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
2386 struct dzn_physical_device *pdev =
2387 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2388
2389 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2390
2391 VkBufferImageCopy2 region = info->pRegions[r];
2392 enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
2393 uint32_t blkh = util_format_get_blockheight(pfmt);
2394 uint32_t blkd = util_format_get_blockdepth(pfmt);
2395
2396 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2397 * to not be block aligned if it's reaching the image boundary, offsets still
2398 * have to be aligned. Align the image extent to make D3D12 happy.
2399 */
2400 dzn_image_align_extent(dst_image, ®ion.imageExtent);
2401
2402 D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
2403 dzn_image_get_copy_loc(dst_image, ®ion.imageSubresource, aspect, l);
2404 D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
2405 dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, ®ion, aspect, l);
2406
2407 if (dzn_buffer_supports_region_copy(pdev, &src_buf_loc)) {
2408 /* RowPitch and Offset are properly aligned, we can copy
2409 * the whole thing in one call.
2410 */
2411 D3D12_BOX src_box = {
2412 .left = 0,
2413 .top = 0,
2414 .front = 0,
2415 .right = region.imageExtent.width,
2416 .bottom = region.imageExtent.height,
2417 .back = region.imageExtent.depth,
2418 };
2419
2420 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
2421 region.imageOffset.x,
2422 region.imageOffset.y,
2423 region.imageOffset.z,
2424 &src_buf_loc, &src_box);
2425 return;
2426 }
2427
2428 /* Copy line-by-line if things are not properly aligned. */
2429 D3D12_BOX src_box = {
2430 .top = 0,
2431 .front = 0,
2432 .bottom = blkh,
2433 .back = blkd,
2434 };
2435
2436 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2437 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2438 uint32_t src_x;
2439
2440 D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
2441 dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
2442 ®ion, &src_buf_loc,
2443 y, z, &src_x);
2444
2445 src_box.left = src_x;
2446 src_box.right = src_x + region.imageExtent.width;
2447 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2448 &dst_img_loc,
2449 region.imageOffset.x,
2450 region.imageOffset.y + y,
2451 region.imageOffset.z + z,
2452 &src_buf_line_loc,
2453 &src_box);
2454 }
2455 }
2456 }
2457
2458 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2459 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
2460 const VkCopyImageToBufferInfo2 *info,
2461 uint32_t r,
2462 VkImageAspectFlagBits aspect,
2463 uint32_t l)
2464 {
2465 VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
2466 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2467 struct dzn_physical_device *pdev =
2468 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2469
2470 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2471
2472 VkBufferImageCopy2 region = info->pRegions[r];
2473 enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
2474 uint32_t blkh = util_format_get_blockheight(pfmt);
2475 uint32_t blkd = util_format_get_blockdepth(pfmt);
2476
2477 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2478 * to not be block aligned if it's reaching the image boundary, offsets still
2479 * have to be aligned. Align the image extent to make D3D12 happy.
2480 */
2481 dzn_image_align_extent(src_image, ®ion.imageExtent);
2482
2483 D3D12_TEXTURE_COPY_LOCATION src_img_loc =
2484 dzn_image_get_copy_loc(src_image, ®ion.imageSubresource, aspect, l);
2485 D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
2486 dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, ®ion, aspect, l);
2487
2488 if (dzn_buffer_supports_region_copy(pdev, &dst_buf_loc)) {
2489 /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
2490 * the whole thing in one call.
2491 */
2492 D3D12_BOX src_box = {
2493 .left = (UINT)region.imageOffset.x,
2494 .top = (UINT)region.imageOffset.y,
2495 .front = (UINT)region.imageOffset.z,
2496 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2497 .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
2498 .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
2499 };
2500
2501 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
2502 0, 0, 0, &src_img_loc,
2503 &src_box);
2504 return;
2505 }
2506
2507 D3D12_BOX src_box = {
2508 .left = (UINT)region.imageOffset.x,
2509 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2510 };
2511
2512 /* Copy line-by-line if things are not properly aligned. */
2513 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2514 src_box.front = region.imageOffset.z + z;
2515 src_box.back = src_box.front + blkd;
2516
2517 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2518 uint32_t dst_x;
2519
2520 D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
2521 dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
2522 ®ion, &dst_buf_loc,
2523 y, z, &dst_x);
2524
2525 src_box.top = region.imageOffset.y + y;
2526 src_box.bottom = src_box.top + blkh;
2527
2528 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2529 &dst_buf_line_loc,
2530 dst_x, 0, 0,
2531 &src_img_loc,
2532 &src_box);
2533 }
2534 }
2535 }
2536
2537 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2538 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
2539 const VkCopyImageInfo2 *info,
2540 D3D12_RESOURCE_DESC *tmp_desc,
2541 D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
2542 uint32_t r,
2543 VkImageAspectFlagBits aspect,
2544 uint32_t l)
2545 {
2546 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2547 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2548 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2549 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2550
2551 ID3D12Device4 *dev = device->dev;
2552 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2553
2554 VkImageCopy2 region = info->pRegions[r];
2555 dzn_image_align_extent(src, ®ion.extent);
2556
2557 const VkImageSubresourceLayers *src_subres = ®ion.srcSubresource;
2558 const VkImageSubresourceLayers *dst_subres = ®ion.dstSubresource;
2559 VkFormat src_format =
2560 dzn_image_get_plane_format(src->vk.format, aspect);
2561 VkFormat dst_format =
2562 dzn_image_get_plane_format(dst->vk.format, aspect);
2563
2564 enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
2565 uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
2566 uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
2567 uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
2568 enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
2569 uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
2570 uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
2571 uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
2572 uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
2573 uint32_t depth = region.extent.depth;
2574 uint32_t dst_l = l, src_l = l;
2575
2576 assert(src_subres->aspectMask == dst_subres->aspectMask);
2577
2578 if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
2579 dst->vk.image_type == VK_IMAGE_TYPE_2D) {
2580 assert(src_subres->layerCount == 1);
2581 src_l = 0;
2582 src_z += l;
2583 depth = 1;
2584 } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
2585 dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2586 assert(dst_subres->layerCount == 1);
2587 dst_l = 0;
2588 dst_z += l;
2589 depth = 1;
2590 } else {
2591 assert(src_subres->layerCount == dst_subres->layerCount);
2592 }
2593
2594 D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
2595 D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
2596
2597 D3D12_BOX src_box = {
2598 .left = (UINT)MAX2(region.srcOffset.x, 0),
2599 .top = (UINT)MAX2(region.srcOffset.y, 0),
2600 .front = (UINT)MAX2(src_z, 0),
2601 .right = (UINT)region.srcOffset.x + region.extent.width,
2602 .bottom = (UINT)region.srcOffset.y + region.extent.height,
2603 .back = (UINT)src_z + depth,
2604 };
2605
2606 if (!tmp_loc->pResource) {
2607 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2608 region.dstOffset.x,
2609 region.dstOffset.y,
2610 dst_z, &src_loc,
2611 &src_box);
2612 return;
2613 }
2614
2615 tmp_desc->Format =
2616 dzn_image_get_placed_footprint_format(pdev, src->vk.format, aspect);
2617 tmp_desc->Width = region.extent.width;
2618 tmp_desc->Height = region.extent.height;
2619
2620 ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
2621 0, 1, 0,
2622 &tmp_loc->PlacedFootprint,
2623 NULL, NULL, NULL);
2624
2625 tmp_loc->PlacedFootprint.Footprint.Depth = depth;
2626
2627 if (r > 0 || l > 0) {
2628 if (cmdbuf->enhanced_barriers) {
2629 dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2630 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2631 D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
2632 } else {
2633 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2634 D3D12_RESOURCE_STATE_COPY_SOURCE,
2635 D3D12_RESOURCE_STATE_COPY_DEST,
2636 DZN_QUEUE_TRANSITION_FLUSH);
2637 }
2638 }
2639
2640 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
2641
2642 if (r > 0 || l > 0) {
2643 if (cmdbuf->enhanced_barriers) {
2644 dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2645 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2646 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
2647 } else {
2648 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2649 D3D12_RESOURCE_STATE_COPY_DEST,
2650 D3D12_RESOURCE_STATE_COPY_SOURCE,
2651 DZN_QUEUE_TRANSITION_FLUSH);
2652 }
2653 }
2654
2655 tmp_desc->Format =
2656 dzn_image_get_placed_footprint_format(pdev, dst->vk.format, aspect);
2657 if (src_blkw != dst_blkw)
2658 tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
2659 if (src_blkh != dst_blkh)
2660 tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
2661
2662 ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
2663 0, 1, 0,
2664 &tmp_loc->PlacedFootprint,
2665 NULL, NULL, NULL);
2666
2667 if (src_blkd != dst_blkd) {
2668 tmp_loc->PlacedFootprint.Footprint.Depth =
2669 DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
2670 } else {
2671 tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
2672 }
2673
2674 D3D12_BOX tmp_box = {
2675 .left = 0,
2676 .top = 0,
2677 .front = 0,
2678 .right = tmp_loc->PlacedFootprint.Footprint.Width,
2679 .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
2680 .back = tmp_loc->PlacedFootprint.Footprint.Depth,
2681 };
2682
2683 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2684 region.dstOffset.x,
2685 region.dstOffset.y,
2686 dst_z,
2687 tmp_loc, &tmp_box);
2688 }
2689
2690 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)2691 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
2692 VkImage image,
2693 VkImageAspectFlagBits aspect,
2694 const VkImageSubresourceLayers *subres,
2695 struct dzn_descriptor_heap *heap,
2696 uint32_t heap_slot)
2697 {
2698 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2699 VK_FROM_HANDLE(dzn_image, img, image);
2700 VkImageViewCreateInfo iview_info = {
2701 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2702 .image = image,
2703 .format = img->vk.format,
2704 .subresourceRange = {
2705 .aspectMask = (VkImageAspectFlags)aspect,
2706 .baseMipLevel = subres->mipLevel,
2707 .levelCount = 1,
2708 .baseArrayLayer = subres->baseArrayLayer,
2709 .layerCount = subres->layerCount,
2710 },
2711 };
2712
2713 switch (img->vk.image_type) {
2714 case VK_IMAGE_TYPE_1D:
2715 iview_info.viewType = img->vk.array_layers > 1 ?
2716 VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
2717 break;
2718 case VK_IMAGE_TYPE_2D:
2719 iview_info.viewType = img->vk.array_layers > 1 ?
2720 VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
2721 break;
2722 case VK_IMAGE_TYPE_3D:
2723 iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2724 break;
2725 default:
2726 unreachable("Invalid type");
2727 }
2728
2729 struct dzn_image_view iview;
2730 dzn_image_view_init(device, &iview, &iview_info);
2731 dzn_descriptor_heap_write_image_view_desc(device, heap, heap_slot, false, false, &iview);
2732 dzn_image_view_finish(&iview);
2733
2734 D3D12_GPU_DESCRIPTOR_HANDLE handle =
2735 dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
2736 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
2737 }
2738
2739 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer,const VkOffset3D * dst_offsets)2740 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
2741 struct dzn_image *img,
2742 VkImageAspectFlagBits aspect,
2743 uint32_t level, uint32_t layer,
2744 const VkOffset3D *dst_offsets)
2745 {
2746 bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
2747 VkImageSubresourceRange range = {
2748 .aspectMask = (VkImageAspectFlags)aspect,
2749 .baseMipLevel = level,
2750 .levelCount = 1,
2751 .baseArrayLayer = layer,
2752 .layerCount = 1,
2753 };
2754
2755 if (ds) {
2756 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
2757 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
2758 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
2759
2760 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
2761 const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2762 if (!pdev->options.PSSpecifiedStencilRefSupported) {
2763 D3D12_RECT clear_rect = {
2764 .left = dst_offsets[0].x,
2765 .right = dst_offsets[1].x,
2766 .top = dst_offsets[0].y,
2767 .bottom = dst_offsets[1].y,
2768 };
2769 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
2770 }
2771 }
2772 } else {
2773 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
2774 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
2775 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, false, NULL);
2776 }
2777 }
2778
2779 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,enum dzn_blit_resolve_mode resolve_mode,uint32_t stencil_bit)2780 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
2781 const struct dzn_image *src,
2782 const struct dzn_image *dst,
2783 VkImageAspectFlagBits aspect,
2784 VkFilter filter,
2785 enum dzn_blit_resolve_mode resolve_mode,
2786 uint32_t stencil_bit)
2787 {
2788 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2789 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2790 assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
2791 enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
2792 VkImageUsageFlags usage =
2793 vk_format_is_depth_or_stencil(dst->vk.format) ?
2794 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
2795 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2796 struct dzn_meta_blit_key ctx_key = {
2797 .out_format = dzn_image_get_dxgi_format(pdev, dst->vk.format, usage, aspect),
2798 .samples = (uint32_t)src->vk.samples,
2799 .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
2800 FRAG_RESULT_DEPTH :
2801 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
2802 FRAG_RESULT_STENCIL :
2803 FRAG_RESULT_DATA0),
2804 .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
2805 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
2806 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
2807 GLSL_TYPE_FLOAT),
2808 .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
2809 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
2810 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
2811 GLSL_SAMPLER_DIM_3D),
2812 .src_is_array = src->vk.array_layers > 1,
2813 .resolve_mode = resolve_mode,
2814 /* Filter doesn't need to be part of the key if we're not embedding a static sampler */
2815 .linear_filter = filter == VK_FILTER_LINEAR && device->support_static_samplers,
2816 .stencil_bit = stencil_bit,
2817 .padding = 0,
2818 };
2819
2820 const struct dzn_meta_blit *ctx =
2821 dzn_meta_blits_get_context(device, &ctx_key);
2822 assert(ctx);
2823
2824 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2825 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
2826 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
2827 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2828 }
2829 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2830 }
2831
2832 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2833 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2834 const struct dzn_image *src,
2835 const VkImageSubresourceLayers *src_subres,
2836 const VkOffset3D *src_offsets,
2837 const struct dzn_image *dst,
2838 const VkImageSubresourceLayers *dst_subres,
2839 const VkOffset3D *dst_offsets,
2840 bool normalize_src_coords)
2841 {
2842 uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2843 uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2844 uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2845 uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2846
2847 float dst_pos[4] = {
2848 (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2849 (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2850 };
2851
2852 float src_pos[4] = {
2853 (float)src_offsets[0].x, (float)src_offsets[0].y,
2854 (float)src_offsets[1].x, (float)src_offsets[1].y,
2855 };
2856
2857 if (normalize_src_coords) {
2858 src_pos[0] /= src_w;
2859 src_pos[1] /= src_h;
2860 src_pos[2] /= src_w;
2861 src_pos[3] /= src_h;
2862 }
2863
2864 float coords[] = {
2865 dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2866 dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2867 dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2868 dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2869 };
2870
2871 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2872
2873 D3D12_VIEWPORT vp = {
2874 .TopLeftX = 0,
2875 .TopLeftY = 0,
2876 .Width = (float)dst_w,
2877 .Height = (float)dst_h,
2878 .MinDepth = 0,
2879 .MaxDepth = 1,
2880 };
2881 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2882
2883 D3D12_RECT scissor = {
2884 .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2885 .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2886 .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2887 .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2888 };
2889 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2890 }
2891
2892 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,D3D12_BARRIER_LAYOUT * restore_src_layout,D3D12_BARRIER_LAYOUT * restore_dst_layout,bool post)2893 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2894 struct dzn_image *src, VkImageLayout src_layout,
2895 const VkImageSubresourceLayers *src_subres,
2896 struct dzn_image *dst, VkImageLayout dst_layout,
2897 const VkImageSubresourceLayers *dst_subres,
2898 VkImageAspectFlagBits aspect,
2899 D3D12_BARRIER_LAYOUT *restore_src_layout,
2900 D3D12_BARRIER_LAYOUT *restore_dst_layout,
2901 bool post)
2902 {
2903 VkImageSubresourceRange src_range = {
2904 .aspectMask = aspect,
2905 .baseMipLevel = src_subres->mipLevel,
2906 .levelCount = 1,
2907 .baseArrayLayer = src_subres->baseArrayLayer,
2908 .layerCount = src_subres->layerCount,
2909 };
2910 VkImageSubresourceRange dst_range = {
2911 .aspectMask = aspect,
2912 .baseMipLevel = dst_subres->mipLevel,
2913 .levelCount = 1,
2914 .baseArrayLayer = dst_subres->baseArrayLayer,
2915 .layerCount = dst_subres->layerCount,
2916 };
2917
2918 if (!post) {
2919 if (cmdbuf->enhanced_barriers) {
2920 D3D12_BARRIER_LAYOUT dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2921 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
2922 *restore_src_layout = dzn_cmd_buffer_require_layout(cmdbuf, src, src_layout,
2923 D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
2924 &src_range);
2925 *restore_dst_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst,
2926 dst_layout,
2927 dst_new_layout,
2928 &dst_range);
2929 } else {
2930 VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2931 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2932 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2933 src_layout,
2934 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2935 DZN_QUEUE_TRANSITION_FLUSH);
2936 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2937 dst_layout,
2938 dst_new_layout,
2939 DZN_QUEUE_TRANSITION_FLUSH);
2940 }
2941 } else {
2942 if (cmdbuf->enhanced_barriers) {
2943 dzn_cmd_buffer_restore_layout(cmdbuf, src,
2944 D3D12_BARRIER_SYNC_PIXEL_SHADING, D3D12_BARRIER_ACCESS_SHADER_RESOURCE,
2945 D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, *restore_src_layout,
2946 &src_range);
2947 if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2948 dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2949 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2950 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, *restore_dst_layout,
2951 &dst_range);
2952 } else {
2953 dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2954 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2955 D3D12_BARRIER_LAYOUT_RENDER_TARGET, *restore_dst_layout,
2956 &dst_range);
2957 }
2958 } else {
2959 VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2960 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2961 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2962 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2963 src_layout,
2964 DZN_QUEUE_TRANSITION_FLUSH);
2965 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2966 dst_new_layout,
2967 dst_layout,
2968 DZN_QUEUE_TRANSITION_FLUSH);
2969 }
2970 }
2971 }
2972
2973 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)2974 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2975 const VkBlitImageInfo2 *info,
2976 struct dzn_descriptor_heap *heap,
2977 uint32_t *heap_slot,
2978 struct dzn_descriptor_heap *sampler_heap,
2979 uint32_t sampler_heap_slot,
2980 uint32_t r)
2981 {
2982 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2983 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2984
2985 const VkImageBlit2 *region = &info->pRegions[r];
2986 bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2987 bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2988 const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2989 const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2990 bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
2991 uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
2992 uint32_t stencil_bit_root_param_slot = 2;
2993 assert(device->support_static_samplers == (sampler_heap == NULL));
2994
2995 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2996 D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
2997 D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
2998 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
2999 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3000 src, info->srcImageLayout, ®ion->srcSubresource,
3001 dst, info->dstImageLayout, ®ion->dstSubresource,
3002 aspect, &restore_src_layout, &restore_dst_layout, false);
3003 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
3004 aspect, ®ion->srcSubresource,
3005 heap, (*heap_slot)++);
3006 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3007 src, ®ion->srcSubresource, region->srcOffsets,
3008 dst, ®ion->dstSubresource, region->dstOffsets,
3009 src->vk.samples == 1);
3010
3011 uint32_t dst_depth =
3012 region->dstOffsets[1].z > region->dstOffsets[0].z ?
3013 region->dstOffsets[1].z - region->dstOffsets[0].z :
3014 region->dstOffsets[0].z - region->dstOffsets[1].z;
3015 uint32_t src_depth =
3016 region->srcOffsets[1].z > region->srcOffsets[0].z ?
3017 region->srcOffsets[1].z - region->srcOffsets[0].z :
3018 region->srcOffsets[0].z - region->srcOffsets[1].z;
3019
3020 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
3021 uint32_t dst_level = region->dstSubresource.mipLevel;
3022
3023 float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
3024 if (region->srcOffsets[0].z > region->srcOffsets[1].z)
3025 src_slice_step = -src_slice_step;
3026 float src_z_coord =
3027 src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
3028 uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
3029 uint32_t dst_z_coord =
3030 dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
3031 if (region->dstOffsets[0].z > region->dstOffsets[1].z)
3032 dst_z_coord--;
3033
3034 uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
3035 1 : -1;
3036
3037 /* Normalize the src coordinates/step */
3038 if (src_is_3d) {
3039 src_z_coord /= src->vk.extent.depth;
3040 src_slice_step /= src->vk.extent.depth;
3041 }
3042
3043 for (uint32_t slice = 0; slice < slice_count; slice++) {
3044 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
3045 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3046 if (!device->support_static_samplers) {
3047 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3048 stencil_bit_root_param_slot++;
3049 }
3050 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3051 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3052 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
3053 for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3054 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
3055 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3056 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3057 }
3058 } else {
3059 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3060 }
3061 src_z_coord += src_slice_step;
3062 dst_z_coord += dst_slice_step;
3063 }
3064
3065 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3066 src, info->srcImageLayout, ®ion->srcSubresource,
3067 dst, info->dstImageLayout, ®ion->dstSubresource,
3068 aspect, &restore_src_layout, &restore_dst_layout, true);
3069 }
3070 }
3071
3072 static enum dzn_blit_resolve_mode
get_blit_resolve_mode(VkResolveModeFlagBits mode)3073 get_blit_resolve_mode(VkResolveModeFlagBits mode)
3074 {
3075 switch (mode) {
3076 case VK_RESOLVE_MODE_AVERAGE_BIT: return dzn_blit_resolve_average;
3077 case VK_RESOLVE_MODE_MIN_BIT: return dzn_blit_resolve_min;
3078 case VK_RESOLVE_MODE_MAX_BIT: return dzn_blit_resolve_max;
3079 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return dzn_blit_resolve_sample_zero;
3080 default: unreachable("Unexpected resolve mode");
3081 }
3082 }
3083
3084 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,VkResolveModeFlags mode,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)3085 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
3086 const VkResolveImageInfo2 *info,
3087 VkResolveModeFlags mode,
3088 struct dzn_descriptor_heap *heap,
3089 uint32_t *heap_slot,
3090 struct dzn_descriptor_heap *sampler_heap,
3091 uint32_t sampler_heap_slot,
3092 uint32_t r)
3093 {
3094 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3095 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3096
3097 const VkImageResolve2 *region = &info->pRegions[r];
3098
3099 const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3100 const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3101 bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
3102 uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
3103 uint32_t stencil_bit_root_param_slot = 2;
3104 assert(device->support_static_samplers == (sampler_heap == NULL));
3105 enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
3106
3107 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3108 D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
3109 D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
3110 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3111 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3112 src, info->srcImageLayout, ®ion->srcSubresource,
3113 dst, info->dstImageLayout, ®ion->dstSubresource,
3114 aspect, &restore_src_layout, &restore_dst_layout, false);
3115 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
3116 ®ion->srcSubresource,
3117 heap, (*heap_slot)++);
3118
3119 VkOffset3D src_offset[2] = {
3120 {
3121 .x = region->srcOffset.x,
3122 .y = region->srcOffset.y,
3123 },
3124 {
3125 .x = (int32_t)(region->srcOffset.x + region->extent.width),
3126 .y = (int32_t)(region->srcOffset.y + region->extent.height),
3127 },
3128 };
3129 VkOffset3D dst_offset[2] = {
3130 {
3131 .x = region->dstOffset.x,
3132 .y = region->dstOffset.y,
3133 },
3134 {
3135 .x = (int32_t)(region->dstOffset.x + region->extent.width),
3136 .y = (int32_t)(region->dstOffset.y + region->extent.height),
3137 },
3138 };
3139
3140 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3141 src, ®ion->srcSubresource, src_offset,
3142 dst, ®ion->dstSubresource, dst_offset,
3143 false);
3144
3145 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
3146 for (uint32_t layer = 0; layer < layer_count; layer++) {
3147 float src_z_coord = layer;
3148
3149 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
3150 dst, aspect, region->dstSubresource.mipLevel,
3151 region->dstSubresource.baseArrayLayer + layer,
3152 dst_offset);
3153 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3154 if (!device->support_static_samplers) {
3155 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3156 stencil_bit_root_param_slot++;
3157 }
3158 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3159 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3160 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
3161 for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3162 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3163 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3164 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3165 }
3166 } else {
3167 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3168 }
3169 }
3170
3171 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3172 src, info->srcImageLayout, ®ion->srcSubresource,
3173 dst, info->dstImageLayout, ®ion->dstSubresource,
3174 aspect, &restore_src_layout, &restore_dst_layout, true);
3175 }
3176 }
3177
3178 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3179 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3180 {
3181 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3182
3183 if (!pipeline)
3184 return;
3185
3186 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3187 ID3D12PipelineState *old_pipeline_state =
3188 cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
3189
3190 uint32_t view_instance_mask = 0;
3191 if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
3192 if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) {
3193 cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig;
3194 /* Changing root signature always requires re-binding descriptor heaps */
3195 cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3196
3197 if (device->bindless) {
3198 /* Note: The D3D12 spec for descriptor heap indexing requires that the descriptor heaps
3199 * are bound *before* the root signature. */
3200 bool bind_heaps = false;
3201 dzn_foreach_pool_type(type) {
3202 if (cmdbuf->state.heaps[type] != &device->device_heaps[type].heap) {
3203 bind_heaps = true;
3204 cmdbuf->state.heaps[type] = &device->device_heaps[type].heap;
3205 }
3206 }
3207 if (bind_heaps) {
3208 ID3D12DescriptorHeap *heaps[NUM_POOL_TYPES];
3209 dzn_foreach_pool_type(type)
3210 heaps[type] = cmdbuf->state.heaps[type]->heap;
3211 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, NUM_POOL_TYPES, heaps);
3212 }
3213 }
3214
3215 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3216 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3217 else
3218 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3219 }
3220 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3221 struct dzn_graphics_pipeline *gfx =
3222 (struct dzn_graphics_pipeline *)pipeline;
3223 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
3224 dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
3225 if (gfx->multiview.native_view_instancing)
3226 view_instance_mask = gfx->multiview.view_mask;
3227 else
3228 view_instance_mask = 1;
3229
3230 if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point)
3231 cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3232 }
3233 }
3234
3235 ID3D12PipelineState *new_pipeline_state = pipeline->state;
3236
3237 if (old_pipeline_state != new_pipeline_state) {
3238 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
3239 cmdbuf->state.pipeline = pipeline;
3240 }
3241
3242 /* Deferring this until after the pipeline has been set due to an NVIDIA driver bug
3243 * when view instancing mask is set with no pipeline bound. */
3244 if (view_instance_mask)
3245 ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask);
3246 }
3247
3248 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3249 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3250 {
3251 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3252 struct dzn_descriptor_state *desc_state =
3253 &cmdbuf->state.bindpoint[bindpoint].desc_state;
3254 struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
3255 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
3256 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
3257 };
3258 uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
3259 bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
3260 const struct dzn_pipeline *pipeline =
3261 cmdbuf->state.bindpoint[bindpoint].pipeline;
3262
3263 /* The set of dirty bits that are cleared by running this function. Notably,
3264 * for bindless, descriptor sets that are bound but unused by the currently
3265 * set pipeline are not processed, meaning their dirty bits should persist
3266 * until such a point as a pipeline does use them. For not-bindless,
3267 * all sets are processed. */
3268 uint32_t dirty_bits_bindless =
3269 (pipeline->dynamic_buffer_count ? DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS : 0) |
3270 (((DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << pipeline->set_count) - 1) & DZN_CMD_BINDPOINT_DIRTY_DESC_SETS);
3271 uint32_t dirty_bits = (device->bindless ? dirty_bits_bindless : DZN_CMD_BINDPOINT_DIRTY_DESC_SETS | DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS);
3272 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bits))
3273 return;
3274
3275 dzn_foreach_pool_type (type) {
3276 if (device->bindless) {
3277 new_heaps[type] = &device->device_heaps[type].heap;
3278 } else {
3279 uint32_t desc_count = pipeline->desc_count[type];
3280 if (!desc_count)
3281 continue;
3282
3283 struct dzn_descriptor_heap_pool *pool =
3284 type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
3285 &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
3286 struct dzn_descriptor_heap *dst_heap = NULL;
3287 uint32_t dst_heap_offset = 0;
3288
3289 dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
3290 &dst_heap, &dst_heap_offset);
3291 new_heap_offsets[type] = dst_heap_offset;
3292 update_root_desc_table[type] = true;
3293
3294 for (uint32_t s = 0; s < MAX_SETS; s++) {
3295 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3296 if (!set) continue;
3297
3298 uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
3299 uint32_t set_desc_count = MIN2(pipeline->sets[s].range_desc_count[type], set->heap_sizes[type]);
3300 if (set_desc_count) {
3301 dzn_descriptor_heap_copy(device, dst_heap, dst_heap_offset + set_heap_offset,
3302 &set->pool->heaps[type], set->heap_offsets[type],
3303 set_desc_count, type);
3304 }
3305
3306 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
3307 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3308 for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3309 struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
3310 if (!bdesc.buffer)
3311 continue;
3312 bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
3313
3314 bool primary_is_writable = bdesc.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
3315 uint32_t desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary;
3316 dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3317 dst_heap_offset + set_heap_offset + desc_heap_offset,
3318 primary_is_writable, &bdesc);
3319
3320 if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt != ~0) {
3321 assert(primary_is_writable);
3322 desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt;
3323 dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3324 dst_heap_offset + set_heap_offset + desc_heap_offset,
3325 false, &bdesc);
3326 }
3327 }
3328 }
3329 }
3330
3331 new_heaps[type] = dst_heap;
3332 }
3333 }
3334
3335 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
3336 new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
3337 ID3D12DescriptorHeap *desc_heaps[2];
3338 uint32_t num_desc_heaps = 0;
3339 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
3340 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
3341 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
3342 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
3343 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
3344
3345 for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
3346 cmdbuf->state.heaps[h] = new_heaps[h];
3347 }
3348
3349 if (!device->bindless) {
3350 for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
3351 D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
3352
3353 if (!update_root_desc_table[type])
3354 continue;
3355
3356 D3D12_GPU_DESCRIPTOR_HANDLE handle =
3357 dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
3358
3359 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3360 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3361 else
3362 ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3363 }
3364 }
3365
3366 if (device->bindless) {
3367 for (uint32_t s = 0; s < pipeline->set_count; ++s) {
3368 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3369 if (!set || !set->pool->bindless.buf)
3370 continue;
3371
3372 uint32_t dirty_bit = DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << s;
3373 if (cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bit) {
3374 uint64_t gpuva = set->pool->bindless.gpuva + (set->heap_offsets[0] * sizeof(struct dxil_spirv_bindless_entry));
3375 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3376 ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3377 else
3378 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3379 }
3380 }
3381 if (pipeline->dynamic_buffer_count &&
3382 (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS)) {
3383 ID3D12Resource *dynamic_buffer_buf = NULL;
3384 uint64_t dynamic_buffer_buf_offset;
3385 VkResult result =
3386 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(struct dxil_spirv_bindless_entry) * pipeline->dynamic_buffer_count,
3387 DZN_INTERNAL_BUF_UPLOAD,
3388 D3D12_RESOURCE_STATE_GENERIC_READ,
3389 D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT,
3390 &dynamic_buffer_buf,
3391 &dynamic_buffer_buf_offset);
3392 if (result != VK_SUCCESS)
3393 return;
3394
3395 uint64_t gpuva = ID3D12Resource_GetGPUVirtualAddress(dynamic_buffer_buf) + dynamic_buffer_buf_offset;
3396 struct dxil_spirv_bindless_entry *map;
3397 ID3D12Resource_Map(dynamic_buffer_buf, 0, NULL, (void **)&map);
3398 map += (dynamic_buffer_buf_offset / sizeof(*map));
3399
3400 for (uint32_t s = 0; s < MAX_SETS; ++s) {
3401 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3402 if (!set)
3403 continue;
3404
3405 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3406 for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3407 const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o];
3408 volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary];
3409 struct dzn_buffer_desc bdesc_updated = *bdesc;
3410 bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o];
3411 dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry);
3412 }
3413 }
3414
3415 ID3D12Resource_Unmap(dynamic_buffer_buf, 0, NULL);
3416 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3417 ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist,
3418 pipeline->root.dynamic_buffer_bindless_param_idx,
3419 gpuva);
3420 else
3421 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3422 pipeline->root.dynamic_buffer_bindless_param_idx,
3423 gpuva);
3424 }
3425 }
3426
3427 cmdbuf->state.bindpoint[bindpoint].dirty &= ~dirty_bits;
3428 }
3429
3430 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3431 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3432 {
3433 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
3434 return;
3435
3436 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3437 uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
3438
3439 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3440 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3441 sizeof(cmdbuf->state.sysvals.gfx) / 4,
3442 &cmdbuf->state.sysvals.gfx, 0);
3443 } else {
3444 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3445 sizeof(cmdbuf->state.sysvals.compute) / 4,
3446 &cmdbuf->state.sysvals.compute, 0);
3447 }
3448
3449 cmdbuf->state.bindpoint[bindpoint].dirty &= ~DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3450 }
3451
3452 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)3453 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
3454 {
3455 const struct dzn_graphics_pipeline *pipeline =
3456 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3457
3458 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
3459 !pipeline->vp.count)
3460 return;
3461
3462 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
3463 }
3464
3465 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)3466 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
3467 {
3468 const struct dzn_graphics_pipeline *pipeline =
3469 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3470
3471 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
3472 return;
3473
3474 if (!pipeline->scissor.count) {
3475 /* Apply a scissor delimiting the render area. */
3476 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
3477 return;
3478 }
3479
3480 D3D12_RECT scissors[MAX_SCISSOR];
3481
3482 memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
3483 for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
3484 scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
3485 scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
3486 scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
3487 scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
3488 }
3489
3490 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
3491 }
3492
3493 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)3494 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
3495 {
3496 unsigned start, end;
3497
3498 BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
3499 ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, &cmdbuf->state.vb.views[start]);
3500
3501 BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
3502 }
3503
3504 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)3505 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
3506 {
3507 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
3508 return;
3509
3510 ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
3511 }
3512
3513 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3514 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3515 {
3516 struct dzn_cmd_buffer_push_constant_state *state =
3517 bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
3518 &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
3519
3520 uint32_t offset = state->offset / 4;
3521 uint32_t end = ALIGN(state->end, 4) / 4;
3522 uint32_t count = end - offset;
3523
3524 if (!count)
3525 return;
3526
3527 uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
3528 uint32_t *vals = state->values + offset;
3529
3530 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3531 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3532 else
3533 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3534
3535 state->offset = 0;
3536 state->end = 0;
3537 }
3538
3539 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)3540 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
3541 {
3542 struct dzn_physical_device *pdev =
3543 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
3544 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
3545 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
3546 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3547 if (cmdbuf->cmdlist8 &&
3548 pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
3549 ID3D12GraphicsCommandList8_OMSetFrontAndBackStencilRef(cmdbuf->cmdlist8,
3550 cmdbuf->state.zsa.stencil_test.front.ref,
3551 cmdbuf->state.zsa.stencil_test.back.ref);
3552 } else {
3553 uint32_t ref =
3554 gfx->zsa.stencil_test.front.uses_ref ?
3555 cmdbuf->state.zsa.stencil_test.front.ref :
3556 cmdbuf->state.zsa.stencil_test.back.ref;
3557 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
3558 }
3559 }
3560 }
3561
3562 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)3563 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
3564 {
3565 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
3566 ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
3567 cmdbuf->state.blend.constants);
3568 }
3569
3570 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)3571 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
3572 {
3573 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
3574 ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
3575 cmdbuf->state.zsa.depth_bounds.min,
3576 cmdbuf->state.zsa.depth_bounds.max);
3577 }
3578 }
3579
3580 static void
dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer * cmdbuf)3581 dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer *cmdbuf)
3582 {
3583 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BIAS) {
3584 assert(cmdbuf->cmdlist9);
3585 ID3D12GraphicsCommandList9_RSSetDepthBias(cmdbuf->cmdlist9,
3586 cmdbuf->state.pipeline_variant.depth_bias.constant_factor,
3587 cmdbuf->state.pipeline_variant.depth_bias.clamp,
3588 cmdbuf->state.pipeline_variant.depth_bias.slope_factor);
3589 }
3590 }
3591
3592 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)3593 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
3594 {
3595 uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
3596 uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
3597
3598 *vertex_count = triangle_count * 3;
3599 if (!*vertex_count)
3600 return VK_SUCCESS;
3601
3602 ID3D12Resource *index_buf;
3603 uint64_t index_offset;
3604 VkResult result =
3605 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
3606 DZN_INTERNAL_BUF_UPLOAD,
3607 D3D12_RESOURCE_STATE_GENERIC_READ,
3608 index_size,
3609 &index_buf,
3610 &index_offset);
3611 if (result != VK_SUCCESS)
3612 return result;
3613
3614 void *cpu_ptr;
3615 ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
3616 cpu_ptr = (uint8_t *)cpu_ptr + index_offset;
3617
3618 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
3619 if (index_size == 2) {
3620 uint16_t *indices = (uint16_t *)cpu_ptr;
3621 for (uint32_t t = 0; t < triangle_count; t++) {
3622 indices[t * 3] = t + 1;
3623 indices[(t * 3) + 1] = t + 2;
3624 indices[(t * 3) + 2] = 0;
3625 }
3626 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
3627 } else {
3628 uint32_t *indices = (uint32_t *)cpu_ptr;
3629 for (uint32_t t = 0; t < triangle_count; t++) {
3630 indices[t * 3] = t + 1;
3631 indices[(t * 3) + 1] = t + 2;
3632 indices[(t * 3) + 2] = 0;
3633 }
3634 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3635 }
3636
3637 cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
3638 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf) + index_offset;
3639 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3640 return VK_SUCCESS;
3641 }
3642
3643 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)3644 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
3645 uint32_t *index_count,
3646 uint32_t *first_index)
3647 {
3648 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3649 uint32_t triangle_count = MAX2(*index_count, 2) - 2;
3650
3651 *index_count = triangle_count * 3;
3652 if (!*index_count)
3653 return VK_SUCCESS;
3654
3655 /* New index is always 32bit to make the compute shader rewriting the
3656 * index simpler */
3657 ID3D12Resource *new_index_buf;
3658 VkResult result =
3659 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
3660 DZN_INTERNAL_BUF_DEFAULT,
3661 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3662 4,
3663 &new_index_buf,
3664 NULL);
3665 if (result != VK_SUCCESS)
3666 return result;
3667
3668 D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
3669 cmdbuf->state.ib.view.BufferLocation;
3670
3671 ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
3672 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3673 ASSERTED bool prim_restart =
3674 dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
3675
3676 assert(!prim_restart);
3677
3678 enum dzn_index_type index_type =
3679 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
3680 const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3681 &device->triangle_fan[index_type];
3682
3683 struct dzn_triangle_fan_rewrite_index_params params = {
3684 .first_index = *first_index,
3685 };
3686
3687 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3688 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3689 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3690 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3691 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
3692 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
3693 ¶ms, 0);
3694 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
3695 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
3696
3697 if (cmdbuf->enhanced_barriers) {
3698 dzn_cmd_buffer_buffer_barrier(cmdbuf, new_index_buf,
3699 D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_INDEX_INPUT,
3700 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDEX_BUFFER);
3701 } else {
3702 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
3703 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3704 D3D12_RESOURCE_STATE_INDEX_BUFFER,
3705 DZN_QUEUE_TRANSITION_FLUSH);
3706 }
3707
3708 /* We don't mess up with the driver state when executing our internal
3709 * compute shader, but we still change the D3D12 state, so let's mark
3710 * things dirty if needed.
3711 */
3712 cmdbuf->state.pipeline = NULL;
3713 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3714 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3715 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3716 }
3717
3718 cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
3719 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
3720 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3721 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3722 *first_index = 0;
3723 return VK_SUCCESS;
3724 }
3725
3726 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)3727 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3728 {
3729 if (indexed)
3730 dzn_cmd_buffer_update_ibview(cmdbuf);
3731
3732 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3733 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3734 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3735 dzn_cmd_buffer_update_viewports(cmdbuf);
3736 dzn_cmd_buffer_update_scissors(cmdbuf);
3737 dzn_cmd_buffer_update_vbviews(cmdbuf);
3738 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3739 dzn_cmd_buffer_update_zsa(cmdbuf);
3740 dzn_cmd_buffer_update_blend_constants(cmdbuf);
3741 dzn_cmd_buffer_update_depth_bounds(cmdbuf);
3742 dzn_cmd_buffer_update_depth_bias(cmdbuf);
3743
3744 /* Reset the dirty states */
3745 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3746 cmdbuf->state.dirty = 0;
3747 }
3748
3749 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)3750 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3751 {
3752 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3753 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3754
3755 if (!pipeline->ia.triangle_fan)
3756 return 0;
3757
3758 uint32_t max_triangles;
3759
3760 if (indexed) {
3761 uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
3762 uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
3763
3764 max_triangles = MAX2(max_indices, 2) - 2;
3765 } else {
3766 uint32_t max_vertex = 0;
3767 for (uint32_t i = 0; i < pipeline->vb.count; i++) {
3768 max_vertex =
3769 MAX2(max_vertex,
3770 cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
3771 }
3772
3773 max_triangles = MAX2(max_vertex, 2) - 2;
3774 }
3775
3776 return max_triangles * 3;
3777 }
3778
3779 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)3780 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
3781 ID3D12Resource *draw_buf,
3782 size_t draw_buf_offset,
3783 ID3D12Resource *count_buf,
3784 size_t count_buf_offset,
3785 uint32_t max_draw_count,
3786 uint32_t draw_buf_stride,
3787 bool indexed)
3788 {
3789 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3790 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3791 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3792 uint32_t min_draw_buf_stride =
3793 indexed ?
3794 sizeof(struct dzn_indirect_indexed_draw_params) :
3795 sizeof(struct dzn_indirect_draw_params);
3796 bool prim_restart =
3797 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
3798
3799 draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
3800 assert(draw_buf_stride >= min_draw_buf_stride);
3801 assert((draw_buf_stride & 3) == 0);
3802
3803 uint32_t triangle_fan_index_buf_stride =
3804 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
3805 sizeof(uint32_t);
3806 uint32_t exec_buf_stride =
3807 triangle_fan_index_buf_stride > 0 ?
3808 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
3809 sizeof(struct dzn_indirect_draw_exec_params);
3810 uint32_t triangle_fan_exec_buf_stride =
3811 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
3812 uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
3813 uint32_t exec_buf_draw_offset = 0;
3814
3815 // We reserve the first slot for the draw_count value when indirect count is
3816 // involved.
3817 if (count_buf != NULL) {
3818 exec_buf_size += exec_buf_stride;
3819 exec_buf_draw_offset = exec_buf_stride;
3820 }
3821
3822 ID3D12Resource *exec_buf;
3823 VkResult result =
3824 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
3825 DZN_INTERNAL_BUF_DEFAULT,
3826 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3827 0,
3828 &exec_buf, NULL);
3829 if (result != VK_SUCCESS)
3830 return;
3831
3832 D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
3833 ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
3834 ID3D12Resource *triangle_fan_index_buf = NULL;
3835 ID3D12Resource *triangle_fan_exec_buf = NULL;
3836
3837 if (triangle_fan_index_buf_stride) {
3838 result =
3839 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3840 max_draw_count * triangle_fan_index_buf_stride,
3841 DZN_INTERNAL_BUF_DEFAULT,
3842 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3843 0,
3844 &triangle_fan_index_buf, NULL);
3845 if (result != VK_SUCCESS)
3846 return;
3847
3848 result =
3849 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3850 max_draw_count * triangle_fan_exec_buf_stride,
3851 DZN_INTERNAL_BUF_DEFAULT,
3852 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3853 0,
3854 &triangle_fan_exec_buf, NULL);
3855 if (result != VK_SUCCESS)
3856 return;
3857 }
3858
3859 struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
3860 .draw_buf_stride = draw_buf_stride,
3861 .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
3862 .triangle_fan_index_buf_start =
3863 triangle_fan_index_buf ?
3864 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
3865 .exec_buf_start =
3866 prim_restart ?
3867 ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
3868 };
3869 uint32_t params_size;
3870 if (triangle_fan_index_buf_stride > 0 && prim_restart)
3871 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
3872 else if (triangle_fan_index_buf_stride > 0)
3873 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
3874 else
3875 params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
3876
3877 enum dzn_indirect_draw_type draw_type;
3878
3879 if (indexed && triangle_fan_index_buf_stride > 0) {
3880 if (prim_restart && count_buf)
3881 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
3882 else if (prim_restart && !count_buf)
3883 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
3884 else if (!prim_restart && count_buf)
3885 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
3886 else
3887 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
3888 } else if (!indexed && triangle_fan_index_buf_stride > 0) {
3889 draw_type = count_buf ?
3890 DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
3891 DZN_INDIRECT_DRAW_TRIANGLE_FAN;
3892 } else if (indexed) {
3893 draw_type = count_buf ?
3894 DZN_INDIRECT_INDEXED_DRAW_COUNT :
3895 DZN_INDIRECT_INDEXED_DRAW;
3896 } else {
3897 draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW;
3898 }
3899
3900 struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
3901 uint32_t root_param_idx = 0;
3902
3903 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3904 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3905 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
3906 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
3907 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3908 params_size / 4, (const void *)¶ms, 0);
3909 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
3910 draw_buf_gpu);
3911 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3912 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
3913 if (count_buf) {
3914 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3915 root_param_idx++,
3916 ID3D12Resource_GetGPUVirtualAddress(count_buf) +
3917 count_buf_offset);
3918 }
3919
3920 if (triangle_fan_exec_buf) {
3921 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
3922 root_param_idx++,
3923 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
3924 }
3925
3926 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
3927
3928 D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
3929 D3D12_BUFFER_BARRIER buf_barriers[2];
3930 D3D12_BARRIER_GROUP enhanced_barriers = {
3931 .NumBarriers = 0,
3932 .Type = D3D12_BARRIER_TYPE_BUFFER,
3933 .pBufferBarriers = buf_barriers
3934 };
3935
3936 if (triangle_fan_exec_buf) {
3937 enum dzn_index_type index_type =
3938 indexed ?
3939 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
3940 DZN_NO_INDEX;
3941 struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3942 &device->triangle_fan[index_type];
3943
3944 struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
3945
3946 assert(rewrite_index->root_sig);
3947 assert(rewrite_index->pipeline_state);
3948 assert(rewrite_index->cmd_sig);
3949
3950 if (cmdbuf->enhanced_barriers) {
3951 dzn_cmd_buffer_buffer_barrier(cmdbuf, triangle_fan_exec_buf,
3952 D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3953 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
3954 } else {
3955 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
3956 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3957 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3958 DZN_QUEUE_TRANSITION_FLUSH);
3959 }
3960
3961 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3962 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3963 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3964 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3965 root_param_idx = 0;
3966 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3967 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
3968 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3969 sizeof(rewrite_index_params) / 4,
3970 (const void *)&rewrite_index_params, 0);
3971
3972 if (indexed) {
3973 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3974 root_param_idx++,
3975 cmdbuf->state.ib.view.BufferLocation);
3976 }
3977
3978 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
3979 max_draw_count, triangle_fan_exec_buf, 0,
3980 count_buf ? exec_buf : NULL, 0);
3981
3982 if (cmdbuf->enhanced_barriers) {
3983 buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3984 .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3985 .SyncAfter = D3D12_BARRIER_SYNC_INDEX_INPUT,
3986 .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3987 .AccessAfter = D3D12_BARRIER_ACCESS_INDEX_BUFFER,
3988 .pResource = triangle_fan_index_buf,
3989 .Offset = 0, .Size = UINT64_MAX
3990 };
3991 } else {
3992 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
3993 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3994 D3D12_RESOURCE_STATE_INDEX_BUFFER,
3995 DZN_QUEUE_TRANSITION_FLUSH);
3996 }
3997
3998 /* After our triangle-fan lowering the draw is indexed */
3999 indexed = true;
4000 ib_view = cmdbuf->state.ib.view;
4001 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
4002 cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
4003 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
4004 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4005 }
4006
4007 if (cmdbuf->enhanced_barriers) {
4008 buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
4009 .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
4010 .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
4011 .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
4012 .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT,
4013 .pResource = exec_buf,
4014 .Offset = 0, .Size = UINT64_MAX
4015 };
4016 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &enhanced_barriers);
4017 } else {
4018 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
4019 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
4020 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
4021 DZN_QUEUE_TRANSITION_FLUSH);
4022 }
4023
4024 /* We don't mess up with the driver state when executing our internal
4025 * compute shader, but we still change the D3D12 state, so let's mark
4026 * things dirty if needed.
4027 */
4028 cmdbuf->state.pipeline = NULL;
4029 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
4030 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4031 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4032 }
4033
4034 enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
4035 triangle_fan_index_buf_stride > 0 ?
4036 DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
4037 indexed ?
4038 DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
4039 DZN_INDIRECT_DRAW_CMD_SIG;
4040 ID3D12CommandSignature *cmdsig =
4041 dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
4042
4043 if (!cmdsig) {
4044 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4045 return;
4046 }
4047
4048 cmdbuf->state.sysvals.gfx.first_vertex = 0;
4049 cmdbuf->state.sysvals.gfx.base_instance = 0;
4050 cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
4051
4052 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
4053 1 : pipeline->multiview.view_mask;
4054 u_foreach_bit(view, view_mask) {
4055 cmdbuf->state.sysvals.gfx.view_index = view;
4056 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4057 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4058
4059 dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
4060
4061 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
4062 max_draw_count,
4063 exec_buf, exec_buf_draw_offset,
4064 count_buf ? exec_buf : NULL, 0);
4065 }
4066
4067 /* Restore the old IB view if we modified it during the triangle fan lowering */
4068 if (ib_view.SizeInBytes) {
4069 cmdbuf->state.ib.view = ib_view;
4070 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4071 }
4072 }
4073
4074 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)4075 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
4076 {
4077 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4078 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4079 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4080 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4081
4082 /* Reset the dirty states */
4083 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
4084 }
4085
4086 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)4087 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
4088 const VkCopyBufferInfo2 *info)
4089 {
4090 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4091 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
4092 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
4093
4094 for (int i = 0; i < info->regionCount; i++) {
4095 const VkBufferCopy2 *region = info->pRegions + i;
4096
4097 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
4098 src_buffer->res, region->srcOffset,
4099 region->size);
4100 }
4101 }
4102
4103 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)4104 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
4105 const VkCopyBufferToImageInfo2 *info)
4106 {
4107 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4108
4109 for (int i = 0; i < info->regionCount; i++) {
4110 const VkBufferImageCopy2 *region = info->pRegions + i;
4111
4112 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4113 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4114 dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
4115 }
4116 }
4117 }
4118
4119 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)4120 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
4121 const VkCopyImageToBufferInfo2 *info)
4122 {
4123 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4124
4125 for (int i = 0; i < info->regionCount; i++) {
4126 const VkBufferImageCopy2 *region = info->pRegions + i;
4127
4128 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4129 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4130 dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
4131 }
4132 }
4133 }
4134
4135 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)4136 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
4137 const VkCopyImageInfo2 *info)
4138 {
4139 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4140 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4141 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4142 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
4143 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
4144
4145 assert(src->vk.samples == dst->vk.samples);
4146
4147 bool requires_temp_res = false;
4148
4149 for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
4150 const VkImageCopy2 *region = &info->pRegions[i];
4151
4152 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4153 assert(aspect & region->dstSubresource.aspectMask);
4154
4155 if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
4156 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
4157 src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
4158 dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
4159 requires_temp_res = true;
4160 break;
4161 }
4162 }
4163 }
4164
4165 bool use_blit = false;
4166 if (src->vk.samples > 1) {
4167 use_blit = requires_temp_res;
4168
4169 for (int i = 0; i < info->regionCount; i++) {
4170 const VkImageCopy2 *region = info->pRegions + i;
4171 if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
4172 region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
4173 region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
4174 region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
4175 region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
4176 region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
4177 use_blit = true;
4178 }
4179 }
4180
4181 if (use_blit) {
4182 /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
4183 * is issued on a transfer queue, but we don't have any better option
4184 * right now...
4185 */
4186 STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
4187
4188 VkBlitImageInfo2 blit_info = {
4189 .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
4190 .srcImage = info->srcImage,
4191 .srcImageLayout = info->srcImageLayout,
4192 .dstImage = info->dstImage,
4193 .dstImageLayout = info->dstImageLayout,
4194 .regionCount = info->regionCount,
4195 .pRegions = blit_regions,
4196 .filter = VK_FILTER_NEAREST,
4197 };
4198
4199 for (uint32_t r = 0; r < info->regionCount; r++) {
4200 blit_regions[r] = (VkImageBlit2) {
4201 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4202 .srcSubresource = info->pRegions[r].srcSubresource,
4203 .srcOffsets = {
4204 info->pRegions[r].srcOffset,
4205 info->pRegions[r].srcOffset,
4206 },
4207 .dstSubresource = info->pRegions[r].dstSubresource,
4208 .dstOffsets = {
4209 info->pRegions[r].dstOffset,
4210 info->pRegions[r].dstOffset,
4211 },
4212 };
4213
4214 blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
4215 blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
4216 blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
4217 blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
4218 blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
4219 blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
4220 }
4221
4222 dzn_CmdBlitImage2(commandBuffer, &blit_info);
4223
4224 STACK_ARRAY_FINISH(blit_regions);
4225 return;
4226 }
4227
4228 D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
4229 D3D12_RESOURCE_DESC tmp_desc = {
4230 .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
4231 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
4232 .DepthOrArraySize = 1,
4233 .MipLevels = 1,
4234 .Format = src->desc.Format,
4235 .SampleDesc = { .Count = 1, .Quality = 0 },
4236 .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
4237 .Flags = D3D12_RESOURCE_FLAG_NONE,
4238 };
4239
4240 if (requires_temp_res) {
4241 ID3D12Device4 *dev = device->dev;
4242 VkImageAspectFlags aspect = 0;
4243 uint64_t max_size = 0;
4244
4245 if (vk_format_has_depth(src->vk.format))
4246 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4247 else if (vk_format_has_stencil(src->vk.format))
4248 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4249 else
4250 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
4251
4252 for (uint32_t i = 0; i < info->regionCount; i++) {
4253 const VkImageCopy2 *region = &info->pRegions[i];
4254 uint64_t region_size = 0;
4255
4256 tmp_desc.Format =
4257 dzn_image_get_dxgi_format(pdev, src->vk.format,
4258 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
4259 aspect);
4260 tmp_desc.Width = region->extent.width;
4261 tmp_desc.Height = region->extent.height;
4262
4263 ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
4264 0, 1, 0,
4265 NULL, NULL, NULL,
4266 ®ion_size);
4267 max_size = MAX2(max_size, region_size * region->extent.depth);
4268 }
4269
4270 VkResult result =
4271 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
4272 DZN_INTERNAL_BUF_DEFAULT,
4273 D3D12_RESOURCE_STATE_COPY_DEST,
4274 0,
4275 &tmp_loc.pResource, NULL);
4276 if (result != VK_SUCCESS)
4277 return;
4278
4279 tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4280 }
4281
4282 for (int i = 0; i < info->regionCount; i++) {
4283 const VkImageCopy2 *region = &info->pRegions[i];
4284
4285 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4286 for (uint32_t l = 0; l < MAX2(region->srcSubresource.layerCount, region->dstSubresource.layerCount); l++)
4287 dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
4288 }
4289 }
4290 }
4291
4292 static VkResult
dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer * cmdbuf,uint32_t num_view_slots,D3D12_FILTER sampler_filter,struct dzn_descriptor_heap ** view_heap,uint32_t * view_heap_slot,struct dzn_descriptor_heap ** sampler_heap,uint32_t * sampler_heap_slot)4293 dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer *cmdbuf,
4294 uint32_t num_view_slots, D3D12_FILTER sampler_filter,
4295 struct dzn_descriptor_heap **view_heap, uint32_t *view_heap_slot,
4296 struct dzn_descriptor_heap **sampler_heap, uint32_t *sampler_heap_slot)
4297 {
4298 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4299
4300 VkResult result =
4301 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
4302 num_view_slots, view_heap, view_heap_slot);
4303
4304 if (result != VK_SUCCESS) {
4305 vk_command_buffer_set_error(&cmdbuf->vk, result);
4306 return result;
4307 }
4308
4309 if (!device->support_static_samplers) {
4310 result =
4311 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->sampler_pool, device,
4312 1, sampler_heap, sampler_heap_slot);
4313
4314 if (result != VK_SUCCESS) {
4315 vk_command_buffer_set_error(&cmdbuf->vk, result);
4316 return result;
4317 }
4318
4319 D3D12_SAMPLER_DESC sampler_desc = {
4320 .Filter = sampler_filter,
4321 .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4322 .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4323 .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4324 .MipLODBias = 0,
4325 .MaxAnisotropy = 0,
4326 .MinLOD = 0,
4327 .MaxLOD = D3D12_FLOAT32_MAX,
4328 };
4329 ID3D12Device4_CreateSampler(device->dev, &sampler_desc,
4330 dzn_descriptor_heap_get_cpu_handle(*sampler_heap, *sampler_heap_slot));
4331 }
4332
4333 if (*view_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
4334 (*sampler_heap && *sampler_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])) {
4335 ID3D12DescriptorHeap * const heaps[] = { (*view_heap)->heap, *sampler_heap ? (*sampler_heap)->heap : NULL };
4336 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = *view_heap;
4337 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = *sampler_heap;
4338 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, *sampler_heap ? 2 : 1, heaps);
4339 }
4340
4341 return VK_SUCCESS;
4342 }
4343
4344 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)4345 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
4346 const VkBlitImageInfo2 *info)
4347 {
4348 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4349
4350 if (info->regionCount == 0)
4351 return;
4352
4353 uint32_t desc_count = 0;
4354 for (uint32_t r = 0; r < info->regionCount; r++)
4355 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4356
4357 struct dzn_descriptor_heap *heap;
4358 uint32_t heap_slot;
4359 struct dzn_descriptor_heap *sampler_heap = NULL;
4360 uint32_t sampler_heap_slot = 0;
4361 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4362 info->filter == VK_FILTER_LINEAR ?
4363 D3D12_FILTER_MIN_MAG_MIP_LINEAR :
4364 D3D12_FILTER_MIN_MAG_MIP_POINT,
4365 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4366
4367 if (result != VK_SUCCESS)
4368 return;
4369
4370 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4371
4372 for (uint32_t r = 0; r < info->regionCount; r++)
4373 dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4374
4375 cmdbuf->state.pipeline = NULL;
4376 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4377 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4378 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4379 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4380 }
4381 }
4382
4383 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4384 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
4385 const VkResolveImageInfo2 *info)
4386 {
4387 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4388
4389 if (info->regionCount == 0)
4390 return;
4391
4392 uint32_t desc_count = 0;
4393 for (uint32_t r = 0; r < info->regionCount; r++)
4394 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4395
4396 struct dzn_descriptor_heap *heap;
4397 uint32_t heap_slot;
4398 struct dzn_descriptor_heap *sampler_heap = NULL;
4399 uint32_t sampler_heap_slot = 0;
4400 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4401 D3D12_FILTER_MIN_MAG_MIP_POINT,
4402 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4403 if (result != VK_SUCCESS)
4404 return;
4405
4406 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4407
4408 for (uint32_t r = 0; r < info->regionCount; r++)
4409 dzn_cmd_buffer_resolve_region(cmdbuf, info, VK_RESOLVE_MODE_AVERAGE_BIT, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4410
4411 cmdbuf->state.pipeline = NULL;
4412 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4413 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4414 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4415 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4416 }
4417 }
4418
4419 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4420 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
4421 VkImage image,
4422 VkImageLayout imageLayout,
4423 const VkClearColorValue *pColor,
4424 uint32_t rangeCount,
4425 const VkImageSubresourceRange *pRanges)
4426 {
4427 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4428 VK_FROM_HANDLE(dzn_image, img, image);
4429
4430 dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
4431 }
4432
4433 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4434 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
4435 VkImage image,
4436 VkImageLayout imageLayout,
4437 const VkClearDepthStencilValue *pDepthStencil,
4438 uint32_t rangeCount,
4439 const VkImageSubresourceRange *pRanges)
4440 {
4441 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4442 VK_FROM_HANDLE(dzn_image, img, image);
4443
4444 dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
4445 }
4446
4447 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)4448 dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,
4449 uint32_t baseGroupX,
4450 uint32_t baseGroupY,
4451 uint32_t baseGroupZ,
4452 uint32_t groupCountX,
4453 uint32_t groupCountY,
4454 uint32_t groupCountZ)
4455 {
4456 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4457
4458 cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
4459 cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
4460 cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
4461 cmdbuf->state.sysvals.compute.base_group_x = baseGroupX;
4462 cmdbuf->state.sysvals.compute.base_group_y = baseGroupY;
4463 cmdbuf->state.sysvals.compute.base_group_z = baseGroupZ;
4464 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4465 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4466
4467 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4468 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
4469 }
4470
4471 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)4472 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
4473 VkBuffer dstBuffer,
4474 VkDeviceSize dstOffset,
4475 VkDeviceSize size,
4476 uint32_t data)
4477 {
4478 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4479 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4480
4481 if (size == VK_WHOLE_SIZE)
4482 size = buf->size - dstOffset;
4483
4484 size &= ~3ULL;
4485
4486 ID3D12Resource *src_res;
4487 uint64_t src_offset;
4488 VkResult result =
4489 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4490 DZN_INTERNAL_BUF_UPLOAD,
4491 D3D12_RESOURCE_STATE_GENERIC_READ,
4492 4,
4493 &src_res,
4494 &src_offset);
4495 if (result != VK_SUCCESS)
4496 return;
4497
4498 uint32_t *cpu_ptr;
4499 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
4500 cpu_ptr += src_offset / sizeof(uint32_t);
4501 for (uint32_t i = 0; i < size / 4; i++)
4502 cpu_ptr[i] = data;
4503
4504 ID3D12Resource_Unmap(src_res, 0, NULL);
4505
4506 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4507 }
4508
4509 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)4510 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
4511 VkBuffer dstBuffer,
4512 VkDeviceSize dstOffset,
4513 VkDeviceSize size,
4514 const void *data)
4515 {
4516 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4517 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4518
4519 if (size == VK_WHOLE_SIZE)
4520 size = buf->size - dstOffset;
4521
4522 /*
4523 * The spec says:
4524 * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
4525 * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
4526 * is not a multiple of 4, then the nearest smaller multiple is used."
4527 */
4528 size &= ~3ULL;
4529
4530 ID3D12Resource *src_res;
4531 uint64_t src_offset;
4532 VkResult result =
4533 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4534 DZN_INTERNAL_BUF_UPLOAD,
4535 D3D12_RESOURCE_STATE_GENERIC_READ,
4536 4,
4537 &src_res, &src_offset);
4538 if (result != VK_SUCCESS)
4539 return;
4540
4541 void *cpu_ptr;
4542 ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
4543 memcpy((uint8_t *)cpu_ptr + src_offset, data, size),
4544 ID3D12Resource_Unmap(src_res, 0, NULL);
4545
4546 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4547 }
4548
4549 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)4550 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
4551 uint32_t attachmentCount,
4552 const VkClearAttachment *pAttachments,
4553 uint32_t rectCount,
4554 const VkClearRect *pRects)
4555 {
4556 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4557
4558 for (unsigned i = 0; i < attachmentCount; i++) {
4559 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
4560 struct dzn_image_view *view = NULL;
4561
4562 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
4563 assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
4564 view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
4565 layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
4566 } else {
4567 if (cmdbuf->state.render.attachments.depth.iview &&
4568 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
4569 view = cmdbuf->state.render.attachments.depth.iview;
4570 layout = cmdbuf->state.render.attachments.depth.layout;
4571 }
4572
4573 if (cmdbuf->state.render.attachments.stencil.iview &&
4574 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
4575 assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
4576 view = cmdbuf->state.render.attachments.stencil.iview;
4577 layout = cmdbuf->state.render.attachments.stencil.layout;
4578 }
4579 }
4580
4581 if (!view)
4582 continue;
4583
4584 for (uint32_t j = 0; j < rectCount; j++) {
4585 D3D12_RECT rect;
4586 dzn_translate_rect(&rect, &pRects[j].rect);
4587
4588 uint32_t view_mask = cmdbuf->state.multiview.view_mask;
4589 if (view_mask != 0) {
4590 u_foreach_bit(layer, view_mask) {
4591 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4592 &pAttachments[i].clearValue,
4593 pAttachments[i].aspectMask,
4594 pRects[j].baseArrayLayer + layer,
4595 pRects[j].layerCount,
4596 1, &rect);
4597 }
4598 } else {
4599 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4600 &pAttachments[i].clearValue,
4601 pAttachments[i].aspectMask,
4602 pRects[j].baseArrayLayer,
4603 pRects[j].layerCount,
4604 1, &rect);
4605 }
4606 }
4607 }
4608 }
4609
4610 static D3D12_RESOLVE_MODE
dzn_get_resolve_mode(VkResolveModeFlags mode)4611 dzn_get_resolve_mode(VkResolveModeFlags mode)
4612 {
4613 switch (mode) {
4614 case VK_RESOLVE_MODE_AVERAGE_BIT: return D3D12_RESOLVE_MODE_AVERAGE;
4615 case VK_RESOLVE_MODE_MAX_BIT: return D3D12_RESOLVE_MODE_MAX;
4616 case VK_RESOLVE_MODE_MIN_BIT: return D3D12_RESOLVE_MODE_MIN;
4617 /* TODO */
4618 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return D3D12_RESOLVE_MODE_MIN;
4619 default: return D3D12_RESOLVE_MODE_AVERAGE;
4620 }
4621 }
4622
4623 static void
dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,const VkImageSubresourceRange * src_range,const VkImageSubresourceRange * dst_range)4624 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer *cmdbuf,
4625 const struct dzn_rendering_attachment *att,
4626 VkImageAspectFlagBits aspect,
4627 const VkImageSubresourceRange *src_range,
4628 const VkImageSubresourceRange *dst_range)
4629 {
4630 uint32_t desc_count = util_bitcount(aspect) * src_range->levelCount * src_range->layerCount;
4631
4632 struct dzn_descriptor_heap *heap;
4633 uint32_t heap_slot;
4634 struct dzn_descriptor_heap *sampler_heap = NULL;
4635 uint32_t sampler_heap_slot = 0;
4636 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4637 D3D12_FILTER_MIN_MAG_MIP_POINT,
4638 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4639 if (result != VK_SUCCESS)
4640 return;
4641
4642 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4643
4644 VkImageResolve2 region = {
4645 .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
4646 .srcSubresource = {
4647 .aspectMask = aspect,
4648 .baseArrayLayer = src_range->baseArrayLayer,
4649 .layerCount = src_range->layerCount,
4650 },
4651 .dstSubresource = {
4652 .aspectMask = aspect,
4653 .baseArrayLayer = dst_range->baseArrayLayer,
4654 .layerCount = dst_range->layerCount,
4655 },
4656 };
4657 VkResolveImageInfo2 resolve_info = {
4658 .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2,
4659 .srcImage = vk_image_to_handle(att->iview->vk.image),
4660 .dstImage = vk_image_to_handle(att->resolve.iview->vk.image),
4661 .srcImageLayout = att->layout,
4662 .dstImageLayout = att->resolve.layout,
4663 .regionCount = 1,
4664 .pRegions = ®ion
4665 };
4666 for (uint32_t level = 0; level < src_range->levelCount; ++level) {
4667 region.srcSubresource.mipLevel = level + src_range->baseMipLevel;
4668 region.dstSubresource.mipLevel = level + dst_range->baseMipLevel;
4669 region.extent = (VkExtent3D){
4670 u_minify(att->iview->vk.image->extent.width, region.srcSubresource.mipLevel),
4671 u_minify(att->iview->vk.image->extent.height, region.srcSubresource.mipLevel),
4672 u_minify(att->iview->vk.image->extent.depth, region.srcSubresource.mipLevel),
4673 };
4674 dzn_cmd_buffer_resolve_region(cmdbuf, &resolve_info, att->resolve.mode, heap, &heap_slot, sampler_heap, sampler_heap_slot, 0);
4675 }
4676
4677 cmdbuf->state.pipeline = NULL;
4678 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4679 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4680 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4681 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4682 }
4683 }
4684
4685 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,bool force_blit_resolve)4686 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
4687 const struct dzn_rendering_attachment *att,
4688 VkImageAspectFlagBits aspect,
4689 bool force_blit_resolve)
4690 {
4691 struct dzn_image_view *src = att->iview;
4692 struct dzn_image_view *dst = att->resolve.iview;
4693
4694 if (!src || !dst || att->resolve.mode == VK_RESOLVE_MODE_NONE)
4695 return;
4696
4697 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4698 struct dzn_physical_device *pdev =
4699 container_of(device->vk.physical, struct dzn_physical_device, vk);
4700
4701 struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
4702 struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
4703
4704 VkImageSubresourceRange src_range = {
4705 .aspectMask = (VkImageAspectFlags)aspect,
4706 .baseMipLevel = src->vk.base_mip_level,
4707 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4708 .baseArrayLayer = src->vk.base_array_layer,
4709 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4710 };
4711 if (src_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4712 src_range.baseArrayLayer = 0;
4713 src_range.layerCount = 1;
4714 }
4715
4716 VkImageSubresourceRange dst_range = {
4717 .aspectMask = (VkImageAspectFlags)aspect,
4718 .baseMipLevel = dst->vk.base_mip_level,
4719 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4720 .baseArrayLayer = dst->vk.base_array_layer,
4721 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4722 };
4723 if (dst_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4724 dst_range.baseArrayLayer = 0;
4725 dst_range.layerCount = 1;
4726 }
4727
4728 if (force_blit_resolve ||
4729 /* Resolve modes other than average are poorly tested / buggy */
4730 att->resolve.mode != VK_RESOLVE_MODE_AVERAGE_BIT ||
4731 /* D3D resolve API can't go from (e.g.) D32S8X24 to D32 */
4732 src->vk.view_format != dst->vk.view_format) {
4733 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(cmdbuf, att, aspect, &src_range, &dst_range);
4734 return;
4735 }
4736
4737 VkImageLayout src_layout = att->layout;
4738 VkImageLayout dst_layout = att->resolve.layout;
4739
4740 D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect, cmdbuf->type);
4741 D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect, cmdbuf->type);
4742 D3D12_BARRIER_LAYOUT src_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4743 src_needed_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
4744 D3D12_BARRIER_LAYOUT dst_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4745 dst_needed_layout = D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
4746 if (cmdbuf->enhanced_barriers) {
4747 src_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, src_img,
4748 src_layout, src_needed_layout,
4749 &src_range);
4750 dst_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst_img,
4751 dst_layout, dst_needed_layout,
4752 &dst_range);
4753 } else {
4754 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4755 src_state,
4756 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4757 DZN_QUEUE_TRANSITION_FLUSH);
4758 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4759 dst_state,
4760 D3D12_RESOURCE_STATE_RESOLVE_DEST,
4761 DZN_QUEUE_TRANSITION_FLUSH);
4762 }
4763
4764 for (uint32_t level = 0; level < src_range.levelCount; level++) {
4765 for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
4766 uint32_t src_subres =
4767 dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
4768 uint32_t dst_subres =
4769 dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
4770
4771 DXGI_FORMAT format =
4772 dzn_image_get_dxgi_format(pdev, dst->vk.format,
4773 dst->vk.usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
4774 aspect);
4775
4776 if (cmdbuf->cmdlist8 &&
4777 pdev->options2.ProgrammableSamplePositionsTier > D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED) {
4778 ID3D12GraphicsCommandList8_ResolveSubresourceRegion(cmdbuf->cmdlist8,
4779 dst_img->res, dst_subres,
4780 0, 0,
4781 src_img->res, src_subres,
4782 NULL,
4783 format,
4784 dzn_get_resolve_mode(att->resolve.mode));
4785 } else {
4786 ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
4787 dst_img->res, dst_subres,
4788 src_img->res, src_subres,
4789 format);
4790 }
4791 }
4792 }
4793
4794 if (cmdbuf->enhanced_barriers) {
4795 dzn_cmd_buffer_restore_layout(cmdbuf, src_img,
4796 D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE,
4797 src_needed_layout, src_restore_layout,
4798 &src_range);
4799 dzn_cmd_buffer_restore_layout(cmdbuf, dst_img,
4800 D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_DEST,
4801 dst_needed_layout, dst_restore_layout,
4802 &dst_range);
4803 } else {
4804 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4805 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4806 src_state,
4807 DZN_QUEUE_TRANSITION_FLUSH);
4808 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4809 D3D12_RESOURCE_STATE_RESOLVE_DEST,
4810 dst_state,
4811 DZN_QUEUE_TRANSITION_FLUSH);
4812 }
4813 }
4814
4815 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)4816 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
4817 const VkRenderingAttachmentInfo *att,
4818 VkImageAspectFlagBits aspect)
4819 {
4820 const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
4821 vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
4822 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4823
4824 if (!initial_layout || !iview)
4825 return;
4826
4827 struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
4828 VkImageSubresourceRange range = {
4829 .aspectMask = aspect,
4830 .baseMipLevel = iview->vk.base_mip_level,
4831 .levelCount = iview->vk.level_count,
4832 .baseArrayLayer = iview->vk.base_array_layer,
4833 .layerCount = iview->vk.layer_count,
4834 };
4835 if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4836 range.baseArrayLayer = 0;
4837 range.layerCount = 1;
4838 }
4839
4840 if (cmdbuf->enhanced_barriers) {
4841 D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL;
4842 D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON;
4843 if (initial_layout->initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
4844 sync_before = D3D12_BARRIER_SYNC_NONE;
4845 access_before = D3D12_BARRIER_ACCESS_NO_ACCESS;
4846 }
4847
4848 D3D12_BARRIER_LAYOUT layout_before = dzn_vk_layout_to_d3d_layout(initial_layout->initialLayout, cmdbuf->type, aspect);
4849 D3D12_BARRIER_LAYOUT layout_after = dzn_vk_layout_to_d3d_layout(att->imageLayout, cmdbuf->type, aspect);
4850 if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) {
4851 layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED;
4852 layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED;
4853 }
4854
4855 dzn_cmd_buffer_image_barrier(cmdbuf, image,
4856 sync_before, D3D12_BARRIER_SYNC_DRAW,
4857 access_before, D3D12_BARRIER_ACCESS_COMMON,
4858 layout_before,
4859 layout_after,
4860 &range);
4861 } else {
4862 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
4863 initial_layout->initialLayout,
4864 att->imageLayout,
4865 DZN_QUEUE_TRANSITION_FLUSH);
4866 }
4867 }
4868
4869 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)4870 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
4871 const VkRenderingInfo *pRenderingInfo)
4872 {
4873 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4874
4875 D3D12_RECT new_render_area = {
4876 .left = pRenderingInfo->renderArea.offset.x,
4877 .top = pRenderingInfo->renderArea.offset.y,
4878 .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
4879 .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
4880 };
4881
4882 // The render area has an impact on the scissor state.
4883 if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
4884 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
4885 cmdbuf->state.render.area = new_render_area;
4886 }
4887
4888 cmdbuf->state.render.flags = pRenderingInfo->flags;
4889 cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
4890 cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
4891
4892 D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
4893 D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
4894
4895 cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
4896 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
4897 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
4898 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4899
4900 cmdbuf->state.render.attachments.colors[i].iview = iview;
4901 cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
4902 cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
4903 cmdbuf->state.render.attachments.colors[i].resolve.iview =
4904 dzn_image_view_from_handle(att->resolveImageView);
4905 cmdbuf->state.render.attachments.colors[i].resolve.layout =
4906 att->resolveImageLayout;
4907 cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
4908
4909 if (!iview) {
4910 rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
4911 continue;
4912 }
4913
4914 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4915 rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
4916 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4917 VK_IMAGE_ASPECT_COLOR_BIT);
4918 }
4919
4920 if (pRenderingInfo->pDepthAttachment) {
4921 const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
4922
4923 cmdbuf->state.render.attachments.depth.iview =
4924 dzn_image_view_from_handle(att->imageView);
4925 cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
4926 cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
4927 cmdbuf->state.render.attachments.depth.resolve.iview =
4928 dzn_image_view_from_handle(att->resolveImageView);
4929 cmdbuf->state.render.attachments.depth.resolve.layout =
4930 att->resolveImageLayout;
4931 cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
4932 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4933 VK_IMAGE_ASPECT_DEPTH_BIT);
4934 }
4935
4936 if (pRenderingInfo->pStencilAttachment) {
4937 const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
4938
4939 cmdbuf->state.render.attachments.stencil.iview =
4940 dzn_image_view_from_handle(att->imageView);
4941 cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
4942 cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
4943 cmdbuf->state.render.attachments.stencil.resolve.iview =
4944 dzn_image_view_from_handle(att->resolveImageView);
4945 cmdbuf->state.render.attachments.stencil.resolve.layout =
4946 att->resolveImageLayout;
4947 cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
4948 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4949 VK_IMAGE_ASPECT_STENCIL_BIT);
4950 }
4951
4952 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
4953 struct dzn_image_view *z_iview =
4954 pRenderingInfo->pDepthAttachment ?
4955 dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
4956 NULL;
4957 struct dzn_image_view *s_iview =
4958 pRenderingInfo->pStencilAttachment ?
4959 dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
4960 NULL;
4961 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
4962 assert(!z_iview || !s_iview || z_iview == s_iview);
4963
4964 if (iview) {
4965 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4966
4967 zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
4968 }
4969 }
4970
4971 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
4972 pRenderingInfo->colorAttachmentCount,
4973 pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
4974 false, zs_handle.ptr ? &zs_handle : NULL);
4975
4976 for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
4977 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
4978 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4979
4980 if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
4981 !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
4982 if (pRenderingInfo->viewMask != 0) {
4983 u_foreach_bit(layer, pRenderingInfo->viewMask) {
4984 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
4985 &att->clearValue,
4986 VK_IMAGE_ASPECT_COLOR_BIT, layer,
4987 1, 1, &cmdbuf->state.render.area);
4988 }
4989 } else {
4990 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
4991 &att->clearValue,
4992 VK_IMAGE_ASPECT_COLOR_BIT, 0,
4993 pRenderingInfo->layerCount, 1,
4994 &cmdbuf->state.render.area);
4995 }
4996 }
4997 }
4998
4999 if ((pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) &&
5000 !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5001 const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
5002 const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
5003 struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
5004 struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
5005 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
5006 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
5007
5008 assert(!z_iview || !s_iview || z_iview == s_iview);
5009
5010 VkImageAspectFlags aspects = 0;
5011 VkClearValue clear_val;
5012
5013 if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5014 aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
5015 clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
5016 layout = z_att->imageLayout;
5017 }
5018
5019 if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5020 aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
5021 clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
5022 layout = s_att->imageLayout;
5023 }
5024
5025 if (aspects != 0) {
5026 if (pRenderingInfo->viewMask != 0) {
5027 u_foreach_bit(layer, pRenderingInfo->viewMask) {
5028 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5029 &clear_val, aspects, layer,
5030 1, 1, &cmdbuf->state.render.area);
5031 }
5032 } else {
5033 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5034 &clear_val, aspects, 0,
5035 VK_REMAINING_ARRAY_LAYERS, 1,
5036 &cmdbuf->state.render.area);
5037 }
5038 }
5039 }
5040
5041 cmdbuf->state.multiview.num_views = MAX2(util_bitcount(pRenderingInfo->viewMask), 1);
5042 cmdbuf->state.multiview.view_mask = MAX2(pRenderingInfo->viewMask, 1);
5043 }
5044
5045 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)5046 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
5047 {
5048 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5049
5050 if (!(cmdbuf->state.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
5051 for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
5052 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5053 &cmdbuf->state.render.attachments.colors[i],
5054 VK_IMAGE_ASPECT_COLOR_BIT, false);
5055 }
5056
5057 bool separate_stencil_resolve =
5058 cmdbuf->state.render.attachments.depth.resolve.mode !=
5059 cmdbuf->state.render.attachments.stencil.resolve.mode;
5060 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5061 &cmdbuf->state.render.attachments.depth,
5062 VK_IMAGE_ASPECT_DEPTH_BIT,
5063 separate_stencil_resolve);
5064 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5065 &cmdbuf->state.render.attachments.stencil,
5066 VK_IMAGE_ASPECT_STENCIL_BIT,
5067 separate_stencil_resolve);
5068 }
5069
5070 memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
5071 }
5072
5073 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)5074 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
5075 VkPipelineBindPoint pipelineBindPoint,
5076 VkPipeline pipe)
5077 {
5078 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5079 VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
5080
5081 cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
5082 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5083 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
5084 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
5085
5086 if (!gfx->vp.dynamic) {
5087 memcpy(cmdbuf->state.viewports, gfx->vp.desc,
5088 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
5089 cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5090 cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5091 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5092 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5093 }
5094
5095 if (!gfx->scissor.dynamic) {
5096 memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
5097 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
5098 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5099 }
5100
5101 if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
5102 cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
5103 cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
5104 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5105 }
5106
5107 if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
5108 cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
5109 cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
5110 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5111 }
5112
5113 if (!gfx->blend.dynamic_constants) {
5114 memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
5115 sizeof(cmdbuf->state.blend.constants));
5116 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5117 }
5118
5119 for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
5120 cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
5121
5122 if (gfx->vb.count > 0)
5123 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
5124 }
5125 }
5126
5127 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)5128 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
5129 VkPipelineBindPoint pipelineBindPoint,
5130 VkPipelineLayout layout,
5131 uint32_t firstSet,
5132 uint32_t descriptorSetCount,
5133 const VkDescriptorSet *pDescriptorSets,
5134 uint32_t dynamicOffsetCount,
5135 const uint32_t *pDynamicOffsets)
5136 {
5137 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5138 VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
5139
5140 struct dzn_descriptor_state *desc_state =
5141 &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
5142 uint32_t dirty = 0;
5143
5144 for (uint32_t i = 0; i < descriptorSetCount; i++) {
5145 uint32_t idx = firstSet + i;
5146 VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
5147
5148 if (desc_state->sets[idx].set != set) {
5149 desc_state->sets[idx].set = set;
5150 dirty |= DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << idx;
5151 }
5152
5153 uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
5154 if (dynamic_buffer_count) {
5155 assert(dynamicOffsetCount >= dynamic_buffer_count);
5156
5157 for (uint32_t j = 0; j < dynamic_buffer_count; j++)
5158 desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
5159
5160 dynamicOffsetCount -= dynamic_buffer_count;
5161 pDynamicOffsets += dynamic_buffer_count;
5162 dirty |= DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS;
5163 }
5164 }
5165
5166 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
5167 }
5168
5169 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)5170 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
5171 uint32_t firstViewport,
5172 uint32_t viewportCount,
5173 const VkViewport *pViewports)
5174 {
5175 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5176
5177 STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
5178
5179 for (uint32_t i = 0; i < viewportCount; i++) {
5180 uint32_t vp = i + firstViewport;
5181
5182 dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
5183
5184 if (pViewports[i].minDepth > pViewports[i].maxDepth)
5185 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5186 else
5187 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5188
5189 if (pViewports[i].height > 0)
5190 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
5191 else
5192 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
5193 }
5194
5195 cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5196 cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5197
5198 if (viewportCount) {
5199 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5200 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5201 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5202 }
5203 }
5204
5205 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)5206 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
5207 uint32_t firstScissor,
5208 uint32_t scissorCount,
5209 const VkRect2D *pScissors)
5210 {
5211 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5212
5213 for (uint32_t i = 0; i < scissorCount; i++)
5214 dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
5215
5216 if (scissorCount)
5217 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5218 }
5219
5220 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)5221 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
5222 VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
5223 const void *pValues)
5224 {
5225 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5226 struct dzn_cmd_buffer_push_constant_state *states[2];
5227 uint32_t num_states = 0;
5228
5229 if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
5230 states[num_states++] = &cmdbuf->state.push_constant.gfx;
5231
5232 if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
5233 states[num_states++] = &cmdbuf->state.push_constant.compute;
5234
5235 for (uint32_t i = 0; i < num_states; i++) {
5236 memcpy(((char *)states[i]->values) + offset, pValues, size);
5237 states[i]->offset =
5238 states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
5239 states[i]->end = MAX2(states[i]->end, offset + size);
5240 }
5241 }
5242
5243 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)5244 dzn_CmdDraw(VkCommandBuffer commandBuffer,
5245 uint32_t vertexCount,
5246 uint32_t instanceCount,
5247 uint32_t firstVertex,
5248 uint32_t firstInstance)
5249 {
5250 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5251
5252 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5253 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5254
5255 cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
5256 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5257
5258 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5259 1 : pipeline->multiview.view_mask;
5260
5261 if (pipeline->ia.triangle_fan) {
5262 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5263
5264 VkResult result =
5265 dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
5266 if (result != VK_SUCCESS || !vertexCount)
5267 return;
5268
5269 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5270 u_foreach_bit(view, view_mask) {
5271 cmdbuf->state.sysvals.gfx.view_index = view;
5272 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5273 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5274 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5275 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
5276 firstVertex, firstInstance);
5277 }
5278
5279 /* Restore the IB view if we modified it when lowering triangle fans. */
5280 if (ib_view.SizeInBytes > 0) {
5281 cmdbuf->state.ib.view = ib_view;
5282 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5283 }
5284 } else {
5285 cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
5286 u_foreach_bit(view, view_mask) {
5287 cmdbuf->state.sysvals.gfx.view_index = view;
5288 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5289 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5290 dzn_cmd_buffer_prepare_draw(cmdbuf, false);
5291 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
5292 firstVertex, firstInstance);
5293 }
5294 }
5295 }
5296
5297 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)5298 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
5299 uint32_t indexCount,
5300 uint32_t instanceCount,
5301 uint32_t firstIndex,
5302 int32_t vertexOffset,
5303 uint32_t firstInstance)
5304 {
5305 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5306
5307 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5308 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5309
5310 if (pipeline->ia.triangle_fan &&
5311 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
5312 /* The indexed+primitive-restart+triangle-fan combination is a mess,
5313 * since we have to walk the index buffer, skip entries with the
5314 * special 0xffff/0xffffffff values, and push triangle list indices
5315 * for the remaining values. All of this has an impact on the index
5316 * count passed to the draw call, which forces us to use the indirect
5317 * path.
5318 */
5319 struct dzn_indirect_indexed_draw_params params = {
5320 .index_count = indexCount,
5321 .instance_count = instanceCount,
5322 .first_index = firstIndex,
5323 .vertex_offset = vertexOffset,
5324 .first_instance = firstInstance,
5325 };
5326
5327 ID3D12Resource *draw_buf;
5328 uint64_t offset;
5329 VkResult result =
5330 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
5331 DZN_INTERNAL_BUF_UPLOAD,
5332 D3D12_RESOURCE_STATE_GENERIC_READ,
5333 4,
5334 &draw_buf, &offset);
5335 if (result != VK_SUCCESS)
5336 return;
5337
5338 void *cpu_ptr;
5339 ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
5340 memcpy((uint8_t *)cpu_ptr + offset, ¶ms, sizeof(params));
5341
5342 ID3D12Resource_Unmap(draw_buf, 0, NULL);
5343
5344 dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, offset, NULL, 0, 1, sizeof(params), true);
5345 return;
5346 }
5347
5348 cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
5349 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5350 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5351
5352 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5353
5354 if (pipeline->ia.triangle_fan) {
5355 VkResult result =
5356 dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
5357 if (result != VK_SUCCESS || !indexCount)
5358 return;
5359 }
5360
5361 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5362 1 : pipeline->multiview.view_mask;
5363 u_foreach_bit(view, view_mask) {
5364 cmdbuf->state.sysvals.gfx.view_index = view;
5365 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5366 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5367
5368 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5369 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
5370 vertexOffset, firstInstance);
5371 }
5372
5373 /* Restore the IB view if we modified it when lowering triangle fans. */
5374 if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
5375 cmdbuf->state.ib.view = ib_view;
5376 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5377 }
5378 }
5379
5380 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5381 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
5382 VkBuffer buffer,
5383 VkDeviceSize offset,
5384 uint32_t drawCount,
5385 uint32_t stride)
5386 {
5387 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5388 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5389
5390 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
5391 }
5392
5393 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5394 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
5395 VkBuffer buffer,
5396 VkDeviceSize offset,
5397 uint32_t drawCount,
5398 uint32_t stride)
5399 {
5400 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5401 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5402
5403 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
5404 }
5405
5406 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5407 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
5408 VkBuffer buffer,
5409 VkDeviceSize offset,
5410 VkBuffer countBuffer,
5411 VkDeviceSize countBufferOffset,
5412 uint32_t maxDrawCount,
5413 uint32_t stride)
5414 {
5415 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5416 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5417 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5418
5419 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5420 count_buf->res, countBufferOffset,
5421 maxDrawCount, stride, false);
5422 }
5423
5424 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5425 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
5426 VkBuffer buffer,
5427 VkDeviceSize offset,
5428 VkBuffer countBuffer,
5429 VkDeviceSize countBufferOffset,
5430 uint32_t maxDrawCount,
5431 uint32_t stride)
5432 {
5433 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5434 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5435 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5436
5437 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5438 count_buf->res, countBufferOffset,
5439 maxDrawCount, stride, true);
5440 }
5441
5442 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)5443 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
5444 uint32_t firstBinding,
5445 uint32_t bindingCount,
5446 const VkBuffer *pBuffers,
5447 const VkDeviceSize *pOffsets)
5448 {
5449 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5450
5451 if (!bindingCount)
5452 return;
5453
5454 D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
5455
5456 for (uint32_t i = 0; i < bindingCount; i++) {
5457 VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
5458
5459 vbviews[firstBinding + i].BufferLocation = buf->gpuva + pOffsets[i];
5460 vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
5461 }
5462
5463 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
5464 firstBinding + bindingCount - 1);
5465 }
5466
5467 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)5468 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
5469 VkBuffer buffer,
5470 VkDeviceSize offset,
5471 VkIndexType indexType)
5472 {
5473 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5474 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5475
5476 cmdbuf->state.ib.view.BufferLocation = buf->gpuva + offset;
5477 cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
5478 switch (indexType) {
5479 case VK_INDEX_TYPE_UINT16:
5480 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
5481 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
5482 break;
5483 case VK_INDEX_TYPE_UINT32:
5484 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
5485 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
5486 break;
5487 default: unreachable("Invalid index type");
5488 }
5489
5490 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5491
5492 const struct dzn_graphics_pipeline *pipeline =
5493 (const struct dzn_graphics_pipeline *)cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5494
5495 if (pipeline &&
5496 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
5497 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5498 }
5499
5500 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags2 stageMask)5501 dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,
5502 VkEvent event,
5503 VkPipelineStageFlags2 stageMask)
5504 {
5505 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5506 VK_FROM_HANDLE(dzn_event, evt, event);
5507
5508 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
5509 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5510 }
5511
5512 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,VkEvent event,const VkDependencyInfo * pDependencyInfo)5513 dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,
5514 VkEvent event,
5515 const VkDependencyInfo *pDependencyInfo)
5516 {
5517 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5518 VK_FROM_HANDLE(dzn_event, evt, event);
5519
5520 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
5521 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5522 }
5523
5524 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfo)5525 dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,
5526 uint32_t eventCount,
5527 const VkEvent *pEvents,
5528 const VkDependencyInfo *pDependencyInfo)
5529 {
5530 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5531
5532 /* Intra-command list wait is handle by this pipeline flush, which is
5533 * overkill, but that's the best we can do with the standard D3D12 barrier
5534 * API.
5535 *
5536 * Inter-command list is taken care of by the serialization done at the
5537 * ExecuteCommandList() level:
5538 * "Calling ExecuteCommandLists twice in succession (from the same thread,
5539 * or different threads) guarantees that the first workload (A) finishes
5540 * before the second workload (B)"
5541 *
5542 * HOST -> DEVICE signaling is ignored and we assume events are always
5543 * signaled when we reach the vkCmdWaitEvents() point.:
5544 * "Command buffers in the submission can include vkCmdWaitEvents commands
5545 * that wait on events that will not be signaled by earlier commands in the
5546 * queue. Such events must be signaled by the application using vkSetEvent,
5547 * and the vkCmdWaitEvents commands that wait upon them must not be inside
5548 * a render pass instance.
5549 * The event must be set before the vkCmdWaitEvents command is executed."
5550 */
5551 bool flush_pipeline = false;
5552
5553 for (uint32_t i = 0; i < eventCount; i++) {
5554 VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
5555
5556 struct hash_entry *he =
5557 _mesa_hash_table_search(cmdbuf->events.ht, event);
5558 if (he) {
5559 enum dzn_event_state state = (uintptr_t)he->data;
5560 assert(state != DZN_EVENT_STATE_RESET);
5561 flush_pipeline = state == DZN_EVENT_STATE_SET;
5562 }
5563 }
5564
5565 if (flush_pipeline) {
5566 if (cmdbuf->enhanced_barriers) {
5567 dzn_cmd_buffer_global_barrier(cmdbuf,
5568 D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
5569 D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON);
5570 } else {
5571 D3D12_RESOURCE_BARRIER barrier = {
5572 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5573 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5574 .UAV = {.pResource = NULL },
5575 };
5576
5577 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5578 }
5579 }
5580 cmdbuf->vk.base.device->dispatch_table.CmdPipelineBarrier2(
5581 vk_command_buffer_to_handle(&cmdbuf->vk),
5582 pDependencyInfo);
5583 }
5584
5585 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)5586 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
5587 VkQueryPool queryPool,
5588 uint32_t query,
5589 VkQueryControlFlags flags)
5590 {
5591 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5592 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5593
5594 struct dzn_cmd_buffer_query_pool_state *state =
5595 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5596 if (!state)
5597 return;
5598
5599 for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5600 qpool->queries[query + i].type = dzn_query_pool_get_query_type(qpool, flags);
5601
5602 ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5603
5604 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, query, cmdbuf->state.multiview.num_views);
5605 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, query, cmdbuf->state.multiview.num_views);
5606 }
5607
5608 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)5609 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
5610 VkQueryPool queryPool,
5611 uint32_t query)
5612 {
5613 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5614 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5615
5616 struct dzn_cmd_buffer_query_pool_state *state =
5617 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5618 if (!state)
5619 return;
5620
5621 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5622
5623 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5624 if (cmdbuf->state.multiview.num_views > 1)
5625 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5626 }
5627
5628 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)5629 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
5630 VkPipelineStageFlags2 stage,
5631 VkQueryPool queryPool,
5632 uint32_t query)
5633 {
5634 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5635 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5636
5637 struct dzn_cmd_buffer_query_pool_state *state =
5638 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5639 if (!state)
5640 return;
5641
5642 /* Execution barrier so the timestamp gets written after the pipeline flush. */
5643 D3D12_RESOURCE_BARRIER barrier = {
5644 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5645 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5646 .UAV = { .pResource = NULL },
5647 };
5648
5649 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5650
5651 for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5652 qpool->queries[query + i].type = D3D12_QUERY_TYPE_TIMESTAMP;
5653 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5654
5655 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5656 if (cmdbuf->state.multiview.num_views > 1)
5657 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5658 }
5659
5660
5661 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)5662 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
5663 VkQueryPool queryPool,
5664 uint32_t firstQuery,
5665 uint32_t queryCount)
5666 {
5667 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5668 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5669 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5670
5671 struct dzn_cmd_buffer_query_pool_state *state =
5672 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5673
5674 if (!state)
5675 return;
5676
5677 uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
5678
5679 for (uint32_t q = 0; q < queryCount; q += q_step) {
5680 uint32_t q_count = MIN2(queryCount - q, q_step);
5681
5682 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5683 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5684 device->queries.refs,
5685 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5686 q_count * sizeof(uint64_t));
5687 }
5688
5689 q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
5690
5691 for (uint32_t q = 0; q < queryCount; q += q_step) {
5692 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5693 dzn_query_pool_get_result_offset(qpool, firstQuery + q),
5694 device->queries.refs,
5695 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5696 qpool->query_size);
5697 }
5698
5699 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
5700 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
5701 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, firstQuery, queryCount);
5702 }
5703
5704 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)5705 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
5706 VkQueryPool queryPool,
5707 uint32_t firstQuery,
5708 uint32_t queryCount,
5709 VkBuffer dstBuffer,
5710 VkDeviceSize dstOffset,
5711 VkDeviceSize stride,
5712 VkQueryResultFlags flags)
5713 {
5714 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5715 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5716 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
5717
5718 struct dzn_cmd_buffer_query_pool_state *qpstate =
5719 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5720 if (!qpstate)
5721 return;
5722
5723 VkResult result =
5724 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
5725 if (result != VK_SUCCESS)
5726 return;
5727
5728 bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
5729 stride == qpool->query_size &&
5730 !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
5731 #define ALL_STATS \
5732 (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
5733 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
5734 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
5735 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
5736 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
5737 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
5738 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
5739 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
5740 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
5741 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
5742 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
5743 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
5744 qpool->pipeline_statistics != ALL_STATS)
5745 raw_copy = false;
5746 #undef ALL_STATS
5747
5748 if (cmdbuf->enhanced_barriers) {
5749 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
5750 dzn_cmd_buffer_buffer_barrier(cmdbuf, qpool->collect_buffer,
5751 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
5752 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5753 }
5754 } else {
5755 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5756 D3D12_RESOURCE_STATE_COPY_DEST,
5757 D3D12_RESOURCE_STATE_COPY_SOURCE,
5758 DZN_QUEUE_TRANSITION_FLUSH);
5759 }
5760
5761 if (raw_copy) {
5762 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5763 qpool->collect_buffer,
5764 dzn_query_pool_get_result_offset(qpool, firstQuery),
5765 dzn_query_pool_get_result_size(qpool, queryCount));
5766 } else {
5767 uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
5768
5769 for (uint32_t q = 0; q < queryCount; q++) {
5770 uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
5771 uint32_t dst_counter_offset = 0;
5772
5773 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
5774 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
5775 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
5776 continue;
5777
5778 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5779 qpool->collect_buffer,
5780 res_offset + (c * sizeof(uint64_t)),
5781 step);
5782 dst_counter_offset += step;
5783 }
5784 } else {
5785 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5786 qpool->collect_buffer,
5787 res_offset, step);
5788 dst_counter_offset += step;
5789 }
5790
5791 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
5792 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5793 qpool->collect_buffer,
5794 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5795 step);
5796 }
5797
5798 dstOffset += stride;
5799 }
5800 }
5801
5802 if (!cmdbuf->enhanced_barriers) {
5803 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5804 D3D12_RESOURCE_STATE_COPY_SOURCE,
5805 D3D12_RESOURCE_STATE_COPY_DEST,
5806 0);
5807 }
5808 }
5809
5810 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)5811 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
5812 VkBuffer buffer,
5813 VkDeviceSize offset)
5814 {
5815 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5816 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5817
5818 cmdbuf->state.sysvals.compute.group_count_x = 0;
5819 cmdbuf->state.sysvals.compute.group_count_y = 0;
5820 cmdbuf->state.sysvals.compute.group_count_z = 0;
5821 cmdbuf->state.sysvals.compute.base_group_x = 0;
5822 cmdbuf->state.sysvals.compute.base_group_y = 0;
5823 cmdbuf->state.sysvals.compute.base_group_z = 0;
5824 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
5825 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5826
5827 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
5828
5829 struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
5830 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
5831 ID3D12CommandSignature *cmdsig =
5832 dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
5833
5834 if (!cmdsig) {
5835 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5836 return;
5837 }
5838
5839 ID3D12Resource *exec_buf;
5840 VkResult result =
5841 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
5842 DZN_INTERNAL_BUF_DEFAULT,
5843 D3D12_RESOURCE_STATE_COPY_DEST,
5844 0,
5845 &exec_buf, NULL);
5846 if (result != VK_SUCCESS)
5847 return;
5848
5849 if (cmdbuf->enhanced_barriers) {
5850 dzn_cmd_buffer_buffer_barrier(cmdbuf, buf->res,
5851 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, D3D12_BARRIER_SYNC_COPY,
5852 D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5853 } else {
5854 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, buf->res, 0, 1,
5855 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5856 D3D12_RESOURCE_STATE_COPY_SOURCE,
5857 DZN_QUEUE_TRANSITION_FLUSH);
5858 }
5859
5860 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
5861 buf->res,
5862 offset,
5863 sizeof(D3D12_DISPATCH_ARGUMENTS));
5864 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
5865 buf->res,
5866 offset,
5867 sizeof(D3D12_DISPATCH_ARGUMENTS));
5868
5869 if (cmdbuf->enhanced_barriers) {
5870 dzn_cmd_buffer_buffer_barrier(cmdbuf, exec_buf,
5871 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
5872 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
5873 } else {
5874 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
5875 D3D12_RESOURCE_STATE_COPY_DEST,
5876 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5877 DZN_QUEUE_TRANSITION_FLUSH);
5878 }
5879
5880 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
5881 }
5882
5883 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)5884 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
5885 float lineWidth)
5886 {
5887 assert(lineWidth == 1.0f);
5888 }
5889
5890 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)5891 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
5892 float depthBiasConstantFactor,
5893 float depthBiasClamp,
5894 float depthBiasSlopeFactor)
5895 {
5896 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5897 struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
5898
5899 cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
5900 cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
5901 cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
5902 cmdbuf->state.sysvals.gfx.depth_bias = depthBiasConstantFactor;
5903 if (pdev->options16.DynamicDepthBiasSupported)
5904 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BIAS;
5905 else
5906 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5907 }
5908
5909 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])5910 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
5911 const float blendConstants[4])
5912 {
5913 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5914
5915 memcpy(cmdbuf->state.blend.constants, blendConstants,
5916 sizeof(cmdbuf->state.blend.constants));
5917 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5918 }
5919
5920 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)5921 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
5922 float minDepthBounds,
5923 float maxDepthBounds)
5924 {
5925 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5926 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5927 struct dzn_physical_device *pdev =
5928 container_of(device->vk.physical, struct dzn_physical_device, vk);
5929
5930 if (pdev->options2.DepthBoundsTestSupported) {
5931 cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
5932 cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
5933 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5934 }
5935 }
5936
5937 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)5938 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
5939 VkStencilFaceFlags faceMask,
5940 uint32_t compareMask)
5941 {
5942 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5943
5944 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5945 cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
5946 cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
5947 }
5948
5949 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5950 cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
5951 cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
5952 }
5953
5954 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
5955 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5956 }
5957
5958 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)5959 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
5960 VkStencilFaceFlags faceMask,
5961 uint32_t writeMask)
5962 {
5963 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5964
5965 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5966 cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
5967 cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
5968 }
5969
5970 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5971 cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
5972 cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
5973 }
5974
5975 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
5976 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5977 }
5978
5979 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)5980 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
5981 VkStencilFaceFlags faceMask,
5982 uint32_t reference)
5983 {
5984 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5985
5986 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
5987 cmdbuf->state.zsa.stencil_test.front.ref = reference;
5988
5989 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
5990 cmdbuf->state.zsa.stencil_test.back.ref = reference;
5991
5992 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5993 }
5994