• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_vk.h"
17 
18 #include <cstdint>
19 #include <functional>
20 #include <vulkan/vulkan_core.h>
21 
22 #include <base/containers/array_view.h>
23 #include <base/containers/fixed_string.h>
24 #include <base/containers/string_view.h>
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #include <core/plugin/intf_class_register.h>
28 #include <render/datastore/render_data_store_render_pods.h>
29 #include <render/device/pipeline_state_desc.h>
30 #include <render/namespace.h>
31 #include <render/nodecontext/intf_render_backend_node.h>
32 #include <render/vulkan/intf_device_vk.h>
33 
34 #include "perf/cpu_perf_scope.h"
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40 
41 #include "device/gpu_resource_handle_util.h"
42 #include "device/gpu_resource_manager.h"
43 #include "nodecontext/node_context_descriptor_set_manager.h"
44 #include "nodecontext/node_context_pool_manager.h"
45 #include "nodecontext/node_context_pso_manager.h"
46 #include "nodecontext/render_barrier_list.h"
47 #include "nodecontext/render_command_list.h"
48 #include "nodecontext/render_node_graph_node_store.h"
49 #include "render_backend.h"
50 #include "util/log.h"
51 #include "util/render_frame_util.h"
52 #include "vulkan/gpu_buffer_vk.h"
53 #include "vulkan/gpu_image_vk.h"
54 #include "vulkan/gpu_sampler_vk.h"
55 #include "vulkan/gpu_semaphore_vk.h"
56 #include "vulkan/node_context_descriptor_set_manager_vk.h"
57 #include "vulkan/node_context_pool_manager_vk.h"
58 #include "vulkan/pipeline_state_object_vk.h"
59 #include "vulkan/render_frame_sync_vk.h"
60 #include "vulkan/swapchain_vk.h"
61 #include "vulkan/validate_vk.h"
62 
63 using namespace BASE_NS;
64 
65 using CORE_NS::GetInstance;
66 using CORE_NS::IParallelTaskQueue;
67 using CORE_NS::IPerformanceDataManager;
68 using CORE_NS::IPerformanceDataManagerFactory;
69 using CORE_NS::ITaskQueueFactory;
70 using CORE_NS::IThreadPool;
71 
72 RENDER_BEGIN_NAMESPACE()
73 namespace {
74 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)75 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
76 {
77     const VkBufferDeviceAddressInfo addressInfo {
78         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
79         nullptr,                                      // pNext
80         buffer,                                       // buffer
81     };
82     return vkGetBufferDeviceAddress(device, &addressInfo);
83 }
84 #endif
85 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)86 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
87 {
88     dst.drawCount += src.drawCount;
89     dst.drawIndirectCount += src.drawIndirectCount;
90     dst.dispatchCount += src.dispatchCount;
91     dst.dispatchIndirectCount += src.dispatchIndirectCount;
92     dst.bindPipelineCount += src.bindPipelineCount;
93     dst.renderPassCount += src.renderPassCount;
94     dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
95     dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
96     dst.triangleCount += src.triangleCount;
97     dst.instanceCount += src.instanceCount;
98 }
99 #endif
100 } // namespace
101 
102 // Helper class for running std::function as a ThreadPool task.
103 class FunctionTask final : public IThreadPool::ITask {
104 public:
Create(std::function<void ()> func)105     static Ptr Create(std::function<void()> func)
106     {
107         return Ptr { new FunctionTask(BASE_NS::move(func)) };
108     }
109 
FunctionTask(std::function<void ()> func)110     explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
111 
operator ()()112     void operator()() override
113     {
114         func_();
115     }
116 
117 protected:
Destroy()118     void Destroy() override
119     {
120         delete this;
121     }
122 
123 private:
124     std::function<void()> func_;
125 };
126 
127 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
128 namespace {
129 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
130 }
131 #endif
132 
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,CORE_NS::ITaskQueue * const queue)133 RenderBackendVk::RenderBackendVk(Device& dev, GpuResourceManager& gpuResourceManager, CORE_NS::ITaskQueue* const queue)
134     : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
135       queue_(queue)
136 {
137 #if (RENDER_PERF_ENABLED == 1)
138 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
139     gpuQueryMgr_ = make_unique<GpuQueryManager>();
140 
141     constexpr uint32_t maxQueryObjectCount { 512u };
142     constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
143     const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
144     const GpuBufferDesc desc {
145         BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT,                        // usageFlags
146         CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
147         0,                                                                              // engineCreationFlags
148         fullByteSize,                                                                   // byteSize
149     };
150     perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
151     perfGpuTimerData_.currentOffset = 0;
152     perfGpuTimerData_.frameByteSize = byteSize;
153     perfGpuTimerData_.fullByteSize = fullByteSize;
154     { // zero initialize
155         uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
156         memset_s(bufferData, fullByteSize, 0, fullByteSize);
157         perfGpuTimerData_.gpuBuffer->Unmap();
158     }
159 #endif
160 #endif
161 }
162 
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)163 void RenderBackendVk::AcquirePresentationInfo(
164     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
165 {
166     RENDER_CPU_PERF_SCOPE("AcquirePresentationInfo", "");
167     if (device_.HasSwapchain()) {
168         presentationData_.present = true;
169         // resized to same for convenience
170         presentationData_.infos.resize(backBufferConfig.swapchainData.size());
171         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
172             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
173             PresentationInfo pi;
174             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175 
176             if (const auto* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
177                 swapchain) {
178                 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179                 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180                 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
181                 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
182                 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
183                 pi.swapchain = platSwapchain.swapchain;
184                 pi.useSwapchain = true;
185                 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
186                 for (const auto& piRef : presentationData_.infos) {
187                     if (piRef.swapchain == pi.swapchain) {
188                         pi.useSwapchain = false;
189                     }
190                 }
191                 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
192                 if (pi.useSwapchain) {
193                     const VkResult result = vkAcquireNextImageKHR(device, // device
194                         vkSwapchain,                                      // swapchin
195                         UINT64_MAX,                                       // timeout
196                         pi.swapchainSemaphore,                            // semaphore
197                         (VkFence) nullptr,                                // fence
198                         &pi.swapchainImageIndex);                         // pImageIndex
199 
200                     switch (result) {
201                         // Success
202                         case VK_SUCCESS:
203                         case VK_TIMEOUT:
204                         case VK_NOT_READY:
205                         case VK_SUBOPTIMAL_KHR:
206                             pi.validAcquire = true;
207                             break;
208 
209                         // Failure
210                         case VK_ERROR_OUT_OF_HOST_MEMORY:
211                         case VK_ERROR_OUT_OF_DEVICE_MEMORY:
212                             PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
213                             return;
214                         case VK_ERROR_DEVICE_LOST:
215                             PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
216                             return;
217                         case VK_ERROR_OUT_OF_DATE_KHR:
218                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
219                             return;
220                         case VK_ERROR_SURFACE_LOST_KHR:
221                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
222                             return;
223 
224                         case VK_EVENT_SET:
225                         case VK_EVENT_RESET:
226                         case VK_INCOMPLETE:
227                         case VK_ERROR_INITIALIZATION_FAILED:
228                         case VK_ERROR_MEMORY_MAP_FAILED:
229                         case VK_ERROR_LAYER_NOT_PRESENT:
230                         case VK_ERROR_EXTENSION_NOT_PRESENT:
231                         case VK_ERROR_FEATURE_NOT_PRESENT:
232                         case VK_ERROR_INCOMPATIBLE_DRIVER:
233                         case VK_ERROR_TOO_MANY_OBJECTS:
234                         case VK_ERROR_FORMAT_NOT_SUPPORTED:
235                         case VK_ERROR_FRAGMENTED_POOL:
236                         case VK_ERROR_OUT_OF_POOL_MEMORY:
237                         case VK_ERROR_INVALID_EXTERNAL_HANDLE:
238                         case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
239                         case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
240                         case VK_ERROR_VALIDATION_FAILED_EXT:
241                         case VK_ERROR_INVALID_SHADER_NV:
242                         // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
243                         case VK_ERROR_FRAGMENTATION_EXT:
244                         case VK_ERROR_NOT_PERMITTED_EXT:
245                         // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
246                         case VK_RESULT_MAX_ENUM:
247                         default:
248                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
249                             PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
250                             device_.SetDeviceStatus(false);
251                             break;
252                     }
253 
254                     if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
255                         PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
256                             static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
257                     }
258 
259                     const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
260                     const RenderHandle handle = swapchainData.remappableSwapchainImage;
261                     if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
262                         // remap image to backbuffer
263                         const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
264                         // special swapchain remapping
265                         gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
266                     }
267                     pi.renderGraphProcessedState = swapData.backBufferState;
268                     pi.imageLayout = swapData.layout;
269                     if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
270                         pi.presentationLayoutChangeNeeded = true;
271                         pi.renderNodeCommandListIndex =
272                             static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
273 
274                         const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
275                         PLUGIN_ASSERT(swapImage);
276                         pi.swapchainImage = swapImage->GetPlatformData().image;
277                     }
278                 }
279             }
280             presentationData_.infos[swapIdx] = pi;
281         }
282     }
283 }
284 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)285 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
286 {
287     if (!queue_) {
288         return;
289     }
290     if (!backBufferConfig.swapchainData.empty()) {
291         if (device_.HasSwapchain() && presentationData_.present) {
292             PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
293             uint32_t swapchainCount = 0U;
294             VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
295                 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
296             uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
297             for (const auto& presRef : presentationData_.infos) {
298                 // NOTE: default backbuffer might be present multiple times
299                 // the flag useSwapchain should be false in these cases
300                 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
301                     PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
302                     vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
303                     vkSwapchains[swapchainCount++] = presRef.swapchain;
304                 }
305             }
306 #if (RENDER_PERF_ENABLED == 1)
307             commonCpuTimers_.present.Begin();
308 #endif
309 
310             // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
311             // semaphore)
312             if (swapchainCount > 0U) {
313                 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
314                 uint32_t waitSemaphoreCount = 0;
315                 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
316                     waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
317                     waitSemaphoreCount = 1;
318                 }
319 
320                 const VkPresentInfoKHR presentInfo {
321                     VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
322                     nullptr,                            // pNext
323                     waitSemaphoreCount,                 // waitSemaphoreCount
324                     &waitSemaphore,                     // pWaitSemaphores
325                     swapchainCount,                     // swapchainCount
326                     vkSwapchains,                       // pSwapchains
327                     vkSwapImageIndices,                 // pImageIndices
328                     nullptr                             // pResults
329                 };
330 
331                 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
332                 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
333                     &presentInfo);                                             // pPresentInfo
334 
335                 switch (result) {
336                         // Success
337                     case VK_SUCCESS:
338                         break;
339                     case VK_SUBOPTIMAL_KHR:
340 #if (RENDER_VALIDATION_ENABLED == 1)
341                         PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
342 #endif
343                         break;
344 
345                         // Failure
346                     case VK_ERROR_OUT_OF_HOST_MEMORY:
347                     case VK_ERROR_OUT_OF_DEVICE_MEMORY:
348                         PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
349                         return;
350                     case VK_ERROR_DEVICE_LOST:
351                         PLUGIN_LOG_E("vkQueuePresentKHR device lost");
352                         return;
353                     case VK_ERROR_OUT_OF_DATE_KHR:
354                         PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
355                         return;
356                     case VK_ERROR_SURFACE_LOST_KHR:
357                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
358                         return;
359 
360                     case VK_NOT_READY:
361                     case VK_TIMEOUT:
362                     case VK_EVENT_SET:
363                     case VK_EVENT_RESET:
364                     case VK_INCOMPLETE:
365                     case VK_ERROR_INITIALIZATION_FAILED:
366                     case VK_ERROR_MEMORY_MAP_FAILED:
367                     case VK_ERROR_LAYER_NOT_PRESENT:
368                     case VK_ERROR_EXTENSION_NOT_PRESENT:
369                     case VK_ERROR_FEATURE_NOT_PRESENT:
370                     case VK_ERROR_INCOMPATIBLE_DRIVER:
371                     case VK_ERROR_TOO_MANY_OBJECTS:
372                     case VK_ERROR_FORMAT_NOT_SUPPORTED:
373                     case VK_ERROR_FRAGMENTED_POOL:
374                     case VK_ERROR_OUT_OF_POOL_MEMORY:
375                     case VK_ERROR_INVALID_EXTERNAL_HANDLE:
376                     case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
377                     case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
378                     case VK_ERROR_VALIDATION_FAILED_EXT:
379                     case VK_ERROR_INVALID_SHADER_NV:
380                     case VK_ERROR_FRAGMENTATION_EXT:
381                     case VK_ERROR_NOT_PERMITTED_EXT:
382                     case VK_RESULT_MAX_ENUM:
383                     default:
384                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
385                         PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
386                         break;
387                 }
388             }
389 #if (RENDER_PERF_ENABLED == 1)
390             commonCpuTimers_.present.End();
391 #endif
392         } else {
393 #if (RENDER_VALIDATION_ENABLED == 1)
394             PLUGIN_LOG_ONCE_E(
395                 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
396 #endif
397         }
398     }
399 }
400 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)401 void RenderBackendVk::Render(
402     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
403 {
404     if (!queue_) {
405         return;
406     }
407 
408     // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410     commonCpuTimers_.full.Begin();
411     commonCpuTimers_.acquire.Begin();
412 #endif
413 
414     commandBufferSubmitter_ = {};
415     commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416 
417     presentationData_.present = false;
418     presentationData_.infos.clear();
419 
420 #if (RENDER_PERF_ENABLED == 1)
421     commonCpuTimers_.acquire.End();
422 
423     StartFrameTimers(renderCommandFrameData);
424     commonCpuTimers_.execute.Begin();
425 #endif
426 
427     // global begin backend frame
428     auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
429     descriptorSetMgr.BeginBackendFrame();
430 
431     // command list process loop/execute
432     // first tries to acquire swapchain if needed in a task
433     RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
434 
435 #if (RENDER_PERF_ENABLED == 1)
436     commonCpuTimers_.execute.End();
437     commonCpuTimers_.submit.Begin();
438 #endif
439 
440     PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
441     // submit vulkan command buffers
442     // checks that presentation info has valid acquire
443     RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
444 
445 #if (RENDER_PERF_ENABLED == 1)
446     commonCpuTimers_.submit.End();
447     commonCpuTimers_.full.End();
448     EndFrameTimers();
449 #endif
450 }
451 
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)452 void RenderBackendVk::RenderProcessSubmitCommandLists(
453     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
454 {
455     // NOTE: currently backtraces to final valid command buffer semaphore
456     uint32_t finalCommandBufferSubmissionIndex = ~0u;
457     commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
458     bool swapchainSemaphoreWaited = false;
459     for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
460          --cmdBufferIdx) {
461         if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
462             (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
463                 VK_NULL_HANDLE)) {
464             finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
465             break;
466         }
467     }
468 
469     for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
470         const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
471         if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
472             continue;
473         }
474 
475         const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
476 
477         uint32_t waitSemaphoreCount = 0u;
478         constexpr const uint32_t maxWaitSemaphoreCount =
479             PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
480         VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
481         VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
482         for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
483             const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
484             PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
485 
486             VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
487             if (waitSemaphore != VK_NULL_HANDLE) {
488                 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
489                 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
490                 waitSemaphoreCount++;
491             }
492         }
493 
494         if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
495             (!presentationData_.infos.empty())) {
496             swapchainSemaphoreWaited = true;
497             // go through all swapchain semaphores
498             for (const auto& presRef : presentationData_.infos) {
499                 if (presRef.swapchainSemaphore) {
500                     waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
501                     waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
502                     waitSemaphoreCount++;
503                 }
504             }
505         }
506 
507         uint32_t signalSemaphoreCount = 0u;
508         PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
509         constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
510         VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
511             VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
512         VkFence fence = VK_NULL_HANDLE;
513         if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
514             // add fence signaling to last submission for frame sync
515             if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
516                 fence = frameSync->GetFrameFence().fence;
517                 frameSync->FrameFenceIsSignalled();
518             }
519             // signal external semaphores
520             if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
521                 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
522                 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
523                 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
524                 if (externalSignals.size() == externalSemaphores.size()) {
525                     for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
526                         // needs to be false
527                         if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
528                             if (const auto* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get(); gs) {
529                                 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
530                                 externalSignals[sigIdx].signaled = true;
531                             }
532                         }
533                     }
534                 }
535             }
536 
537             if (presentationData_.present) {
538                 commandBufferSubmitter_.presentationWaitSemaphore =
539                     commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
540                 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
541             }
542             // add additional semaphores
543             for (const auto& swapRef : backBufferConfig.swapchainData) {
544                 // should have been checked in render graph already
545                 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
546                     semaphores[signalSemaphoreCount++] =
547                         VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
548                 }
549             }
550         } else if (renderContextRef.submitDepencies.signalSemaphore) {
551             semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
552         }
553         PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
554 
555         const VkSubmitInfo submitInfo {
556             VK_STRUCTURE_TYPE_SUBMIT_INFO,                        // sType
557             nullptr,                                              // pNext
558             waitSemaphoreCount,                                   // waitSemaphoreCount
559             (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
560             waitSemaphorePipelineStageFlags,                      // pWaitDstStageMask
561             1,                                                    // commandBufferCount
562             &cmdSubmitterRef.commandBuffer,                       // pCommandBuffers
563             signalSemaphoreCount,                                 // signalSemaphoreCount
564             (signalSemaphoreCount == 0) ? nullptr : semaphores,   // pSignalSemaphores
565         };
566 
567         const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
568         if (queue) {
569             RENDER_CPU_PERF_SCOPE("vkQueueSubmit", "");
570             VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
571                 1,                                  // submitCount
572                 &submitInfo,                        // pSubmits
573                 fence));                            // fence
574         }
575     }
576 }
577 
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)578 void RenderBackendVk::RenderProcessCommandLists(
579     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
580 {
581     // queue checked in upper level
582 
583     const auto cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
584     constexpr uint64_t acquireTaskId { 0xFFFFffff0 };
585     constexpr uint64_t globalDescSetTaskId { 0xFFFFffff1 };
586     bool acquireSubmitted { false };
587     bool globalDescSetSubmitted { false };
588     vector<uint64_t> afterIdentifiers;
589     afterIdentifiers.reserve(2U); // global descriptor sets, and swapchain acquire wait
590     // submit global descset task if needed
591     {
592         auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
593         const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
594         if (!allDescSets.empty()) {
595             globalDescSetSubmitted = true;
596             queue_->Submit(globalDescSetTaskId, FunctionTask::Create([this]() { UpdateGlobalDescriptorSets(); }));
597         }
598     }
599     // submit acquire task if needed
600     if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
601         acquireSubmitted = true;
602         queue_->Submit(acquireTaskId, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
603             AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
604         }));
605     }
606     uint64_t secondaryIdx = cmdBufferCount;
607     for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
608         afterIdentifiers.clear();
609         // add wait for acquire if needed
610         if (acquireSubmitted && (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx)) {
611             afterIdentifiers.push_back(acquireTaskId);
612         }
613         // NOTE: idx increase
614         const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
615         const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
616         PLUGIN_ASSERT(mrpData.subpassCount > 0);
617         const uint32_t rcCount = mrpData.subpassCount;
618         // add wait for global descriptor sets if needed
619         // add safety wait for secondary command lists always (NOTE: needs to further optimized)
620         if (globalDescSetSubmitted &&
621             (mrpData.secondaryCmdLists || ref.renderCommandList->HasGlobalDescriptorSetBindings())) {
622             afterIdentifiers.push_back(globalDescSetTaskId);
623         }
624         if (mrpData.secondaryCmdLists) {
625             afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
626             for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
627                 const uint64_t submitId = secondaryIdx++;
628                 afterIdentifiers.push_back(submitId);
629                 PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
630                 queue_->SubmitAfter(afterIdentifiers, submitId,
631                     FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
632                         const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
633                         MultiRenderCommandListDesc mrcDesc;
634                         mrcDesc.multiRenderCommandListCount = 1u;
635                         mrcDesc.baseContext = nullptr;
636                         mrcDesc.secondaryCommandBuffer = true;
637                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
638                         const DebugNames debugNames { ref2.debugName,
639                             renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
640                         RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
641                     }));
642             }
643             queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
644                 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
645                     MultiRenderCommandListDesc mrcDesc;
646                     mrcDesc.multiRenderCommandListCount = rcCount;
647                     RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
648                     const DebugNames debugNames { ref2.debugName,
649                         renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
650                     RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
651                 }));
652         } else {
653             queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
654                 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
655                     MultiRenderCommandListDesc mrcDesc;
656                     mrcDesc.multiRenderCommandListCount = rcCount;
657                     if (rcCount > 1) {
658                         mrcDesc.multiRenderNodeCmdList = true;
659                         mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
660                     }
661                     for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
662                         const uint32_t currIdx = cmdBufferIdx + rcIdx;
663                         mrcDesc.multiRenderCommandListIndex = rcIdx;
664                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
665                         const DebugNames debugNames { ref2.debugName,
666                             renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
667                         RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
668                     }
669                 }));
670         }
671         // idx increase
672         cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
673     }
674 
675     // execute and wait for completion.
676     queue_->Execute();
677     queue_->Clear();
678 }
679 
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)680 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
681     RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
682     const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
683 {
684     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
685     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
686     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
687 
688     const ContextCommandPoolVk& ptrCmdPool =
689         (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
690     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
691 
692     // begin cmd buffer
693     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
694     constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
695     const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
696     if (valid) {
697         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
698             ptrCmdPool.commandPool,                   // commandPool
699             commandPoolResetFlags));                  // flags
700     }
701 
702     constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
703         VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
704     };
705     const VkCommandBufferBeginInfo commandBufferBeginInfo {
706         VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
707         nullptr,                                     // pNext
708         commandBufferUsageFlags,                     // flags
709         nullptr,                                     // pInheritanceInfo
710     };
711     if (valid) {
712         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
713             &commandBufferBeginInfo));                                   // pBeginInfo
714     }
715 
716     StateCache stateCache;
717 
718     const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
719     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
720     const auto commandCount = static_cast<uint32_t>(rcRef.size());
721     const RenderCommandBeginRenderPass* rcBeginRenderPass =
722         (mrpcld.rpBeginCmdIndex < commandCount)
723             ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
724             : nullptr;
725     const RenderCommandEndRenderPass* rcEndRenderPass =
726         (mrpcld.rpEndCmdIndex < commandCount)
727             ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
728             : nullptr;
729 
730     if (rcBeginRenderPass && rcEndRenderPass) {
731         if (mrpcld.rpBarrierCmdIndex < commandCount) {
732             const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
733             PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
734             const RenderCommandBarrierPoint& barrierPoint =
735                 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
736             // handle all barriers before render command that needs resource syncing
737             RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
738         }
739 
740         // begin render pass
741         stateCache.primaryRenderPass = true;
742         RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
743         stateCache.primaryRenderPass = false;
744 
745         // get secondary command buffers from correct indices and execute
746         for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
747             const uint32_t currCmdBufIdx = cmdBufIdx + idx;
748             PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
749             const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
750             NodeContextPoolManagerVk& contextPoolVk =
751                 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
752 
753             const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
754             const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
755             const auto mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
756             // next subpass only called from second render pass on
757             if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
758                 RenderCommandBeginRenderPass renderPass =
759                     *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
760                 renderPass.renderPassDesc.subpassContents =
761                     SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
762                 stateCache.renderCommandBeginRenderPass = nullptr; // reset
763                 RenderCommand(
764                     renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
765             }
766             RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
767         }
768 
769         // end render pass (replace the primary render pass)
770         stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
771         // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
772         constexpr RenderCommandEndRenderPass rcerp = {};
773         RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
774     }
775 
776     // end cmd buffer
777     if (valid) {
778         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
779     }
780 
781     commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
782 }
783 
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)784 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
785     const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
786 {
787     if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
788         vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
789             1u,                                       // commandBufferCount
790             &executeCmdBuffer.commandBuffer);         // pCommandBuffers
791     }
792 }
793 
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)794 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
795     const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
796 {
797     auto& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
798 
799     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
800     const auto cmdCount = static_cast<uint32_t>(rcRef.size());
801 
802     const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
803     PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
804     PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
805     if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
806         const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
807         PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
808         const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
809         LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
810 
811         const uint32_t subpass = renderCmd.subpassStartIndex;
812         return VkCommandBufferInheritanceInfo {
813             VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
814             nullptr,                                           // pNext
815             lowLevelRenderPassData.renderPass,                 // renderPass
816             subpass,                                           // subpass
817             VK_NULL_HANDLE,                                    // framebuffer
818             VK_FALSE,                                          // occlusionQueryEnable
819             0,                                                 // queryFlags
820             0,                                                 // pipelineStatistics
821         };
822     } else {
823         return VkCommandBufferInheritanceInfo {};
824     }
825 }
826 
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)827 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
828     const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
829 {
830     // these are validated in render graph
831     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
832     const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
833     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
834     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
835     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
836 
837 #if (RENDER_PERF_ENABLED == 1)
838 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
839     const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
840     const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
841 #endif
842     PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
843     PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
844     if (perfDataSet) {
845         perfDataSet->cpuTimer.Begin();
846     }
847 
848     RENDER_CPU_PERF_SCOPE("RenderSingleCommandList", debugNames.renderCommandBufferName);
849 #endif
850 
851     contextPoolMgr.BeginBackendFrame();
852     ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
853     nodeContextPsoMgr.BeginBackendFrame();
854 
855     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
856 
857     StateCache stateCache = {}; // state cache for this render command list
858     stateCache.backendNode = renderCommandCtx.renderBackendNode;
859     stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
860 
861     // command buffer has been wait with a single frame fence
862     const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
863     const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
864     const bool endCommandBuffer =
865         (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
866     const ContextCommandPoolVk* ptrCmdPool = nullptr;
867     if (mrclDesc.multiRenderNodeCmdList) {
868         PLUGIN_ASSERT(mrclDesc.baseContext);
869         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
870                           ->GetContextCommandPool();
871     } else if (mrclDesc.secondaryCommandBuffer) {
872         PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
873         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
874     } else {
875         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
876     }
877 
878     // update cmd list context descriptor sets
879     UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
880 
881     PLUGIN_ASSERT(ptrCmdPool);
882     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
883 
884     if (beginCommandBuffer) {
885         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
886         constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
887         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
888             ptrCmdPool->commandPool,                  // commandPool
889             commandPoolResetFlags));                  // flags
890 
891         VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
892         VkCommandBufferInheritanceInfo inheritanceInfo {};
893         if (stateCache.secondaryCommandBuffer) {
894             commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
895             inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
896         }
897         const VkCommandBufferBeginInfo commandBufferBeginInfo {
898             VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,                    // sType
899             nullptr,                                                        // pNext
900             commandBufferUsageFlags,                                        // flags
901             mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
902         };
903 
904         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
905             &commandBufferBeginInfo));                                   // pBeginInfo
906 
907 #if (RENDER_PERF_ENABLED == 1)
908 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
909         if (validGpuQueries) {
910             GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
911             PLUGIN_ASSERT(gpuQuery);
912 
913             gpuQuery->NextQueryIndex();
914 
915             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
916                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
917         }
918 #endif
919 #endif
920     }
921 
922 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
923     {
924         BeginDebugMarker(cmdBuffer, debugNames.renderCommandListName, { 1.f, 1.f, 1.f, 1.f });
925     }
926 #endif
927 
928     for (const auto& ref : rcRef) {
929         if (!stateCache.validCommandList) {
930 #if (RENDER_VALIDATION_ENABLED == 1)
931             PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
932                 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
933                 debugNames.renderCommandListName.data());
934 #endif
935             break;
936         }
937 
938         PLUGIN_ASSERT(ref.rc);
939 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
940         {
941             const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
942             BeginDebugMarker(cmdBuffer, COMMAND_NAMES[index], { 0.87f, 0.83f, 0.29f, 1.f });
943         }
944 #endif
945 
946         switch (ref.type) {
947             case RenderCommandType::BARRIER_POINT: {
948                 if (!stateCache.secondaryCommandBuffer) {
949                     const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
950                     // handle all barriers before render command that needs resource syncing
951                     RenderCommand(
952                         barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
953                 }
954                 break;
955             }
956             case RenderCommandType::DRAW: {
957                 RenderCommand(
958                     *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
959                 break;
960             }
961             case RenderCommandType::DRAW_INDIRECT: {
962                 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
963                     contextPoolMgr, stateCache);
964                 break;
965             }
966             case RenderCommandType::DISPATCH: {
967                 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
968                     contextPoolMgr, stateCache);
969                 break;
970             }
971             case RenderCommandType::DISPATCH_INDIRECT: {
972                 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
973                     contextPoolMgr, stateCache);
974                 break;
975             }
976             case RenderCommandType::BIND_PIPELINE: {
977                 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
978                     contextPoolMgr, stateCache);
979                 break;
980             }
981             case RenderCommandType::BEGIN_RENDER_PASS: {
982                 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
983                     contextPoolMgr, stateCache);
984                 break;
985             }
986             case RenderCommandType::NEXT_SUBPASS: {
987                 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
988                     contextPoolMgr, stateCache);
989                 break;
990             }
991             case RenderCommandType::END_RENDER_PASS: {
992                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
993                     contextPoolMgr, stateCache);
994                 break;
995             }
996             case RenderCommandType::BIND_VERTEX_BUFFERS: {
997                 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
998                     contextPoolMgr, stateCache);
999                 break;
1000             }
1001             case RenderCommandType::BIND_INDEX_BUFFER: {
1002                 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1003                     contextPoolMgr, stateCache);
1004                 break;
1005             }
1006             case RenderCommandType::COPY_BUFFER: {
1007                 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1008                     contextPoolMgr, stateCache);
1009                 break;
1010             }
1011             case RenderCommandType::COPY_BUFFER_IMAGE: {
1012                 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1013                     contextPoolMgr, stateCache);
1014                 break;
1015             }
1016             case RenderCommandType::COPY_IMAGE: {
1017                 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1018                     contextPoolMgr, stateCache);
1019                 break;
1020             }
1021             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1022                 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1023                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1024                 break;
1025             }
1026             case RenderCommandType::PUSH_CONSTANT: {
1027                 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1028                     contextPoolMgr, stateCache);
1029                 break;
1030             }
1031             case RenderCommandType::BLIT_IMAGE: {
1032                 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1033                     contextPoolMgr, stateCache);
1034                 break;
1035             }
1036             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1037                 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1038                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1039                 break;
1040             }
1041             case RenderCommandType::CLEAR_COLOR_IMAGE: {
1042                 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1043                     contextPoolMgr, stateCache);
1044                 break;
1045             }
1046             // dynamic states
1047             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1048                 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1049                     contextPoolMgr, stateCache);
1050                 break;
1051             }
1052             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1053                 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1054                     contextPoolMgr, stateCache);
1055                 break;
1056             }
1057             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1058                 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1059                     contextPoolMgr, stateCache);
1060                 break;
1061             }
1062             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1063                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1064                     contextPoolMgr, stateCache);
1065                 break;
1066             }
1067             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1068                 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1069                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1070                 break;
1071             }
1072             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1073                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1074                     contextPoolMgr, stateCache);
1075                 break;
1076             }
1077             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1078                 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1079                     contextPoolMgr, stateCache);
1080                 break;
1081             }
1082             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1083                 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1084                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1085                 break;
1086             }
1087             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1088                 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1089                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1090                 break;
1091             }
1092             //
1093             case RenderCommandType::WRITE_TIMESTAMP: {
1094                 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1095                     contextPoolMgr, stateCache);
1096                 break;
1097             }
1098             case RenderCommandType::UNDEFINED:
1099             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1100             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1101             case RenderCommandType::BEGIN_DEBUG_MARKER:
1102 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1103                 RenderCommand(*static_cast<RenderCommandBeginDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1104                     contextPoolMgr, stateCache);
1105 #endif
1106                 break;
1107             case RenderCommandType::END_DEBUG_MARKER:
1108 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1109                 RenderCommand(*static_cast<RenderCommandEndDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1110                     contextPoolMgr, stateCache);
1111 #endif
1112                 break;
1113             default: {
1114                 PLUGIN_ASSERT(false && "non-valid render command");
1115                 break;
1116             }
1117         }
1118 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1119         {
1120             EndDebugMarker(cmdBuffer);
1121         }
1122 #endif
1123     }
1124 
1125     if ((!presentationData_.infos.empty())) {
1126         RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1127     }
1128 
1129 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1130     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1131         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1132     }
1133 #endif
1134 
1135 #if (RENDER_PERF_ENABLED == 1)
1136     // copy counters
1137     if (perfDataSet) {
1138         CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1139     }
1140 #endif
1141 
1142     if (endCommandBuffer) {
1143 #if (RENDER_PERF_ENABLED == 1)
1144         if (perfDataSet) {
1145             perfDataSet->cpuTimer.End();
1146         }
1147 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1148         if (validGpuQueries) {
1149             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1150                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1151         }
1152 #endif
1153         CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1154 #endif
1155 
1156         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1157 
1158         if (mrclDesc.secondaryCommandBuffer) {
1159             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1160         } else {
1161             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1162         }
1163     }
1164 }
1165 
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1166 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1167     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1168 {
1169     const RenderHandle psoHandle = renderCmd.psoHandle;
1170     const auto pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1171 
1172     stateCache.psoHandle = psoHandle;
1173 
1174     VkPipeline pipeline { VK_NULL_HANDLE };
1175     VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1176     if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1177         const auto* pso = static_cast<const ComputePipelineStateObjectVk*>(
1178             psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1179         if (pso) {
1180             const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1181             pipeline = plat.pipeline;
1182             pipelineLayout = plat.pipelineLayout;
1183         }
1184     } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1185         PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1186         if (stateCache.renderCommandBeginRenderPass) {
1187             uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1188             if (stateCache.pipelineDescSetHash != 0) {
1189                 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1190             }
1191             const auto* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1192                 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1193                     stateCache.renderCommandBeginRenderPass->subpasses,
1194                     stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1195                     &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1196             if (pso) {
1197                 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1198                 pipeline = plat.pipeline;
1199                 pipelineLayout = plat.pipelineLayout;
1200             }
1201         }
1202     }
1203 
1204     // NOTE: render front-end expects pso binding after begin render pass
1205     // in some situations the render pass might change and therefore the pipeline changes
1206     // in some situations the render pass is the same and the rebinding is not needed
1207     const bool newPipeline = (pipeline != stateCache.pipeline);
1208     const bool valid = (pipeline != VK_NULL_HANDLE);
1209     if (valid && newPipeline) {
1210         stateCache.pipeline = pipeline;
1211         stateCache.pipelineLayout = pipelineLayout;
1212         stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1213         vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1214             pipelineBindPoint,                  // pipelineBindPoint
1215             pipeline);                          // pipeline
1216 #if (RENDER_PERF_ENABLED == 1)
1217         stateCache.perfCounters.bindPipelineCount++;
1218 #endif
1219     }
1220 }
1221 
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1222 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1223     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1224 {
1225     if (stateCache.validBindings) {
1226         if (renderCmd.indexCount) {
1227             vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1228                 renderCmd.indexCount,              // indexCount
1229                 renderCmd.instanceCount,           // instanceCount
1230                 renderCmd.firstIndex,              // firstIndex
1231                 renderCmd.vertexOffset,            // vertexOffset
1232                 renderCmd.firstInstance);          // firstInstance
1233 #if (RENDER_PERF_ENABLED == 1)
1234             stateCache.perfCounters.drawCount++;
1235             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1236             stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1237 #endif
1238         } else {
1239             vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1240                 renderCmd.vertexCount,      // vertexCount
1241                 renderCmd.instanceCount,    // instanceCount
1242                 renderCmd.firstVertex,      // firstVertex
1243                 renderCmd.firstInstance);   // firstInstance
1244 #if (RENDER_PERF_ENABLED == 1)
1245             stateCache.perfCounters.drawCount++;
1246             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1247             stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1248                                                      * renderCmd.instanceCount;
1249 #endif
1250         }
1251     }
1252 }
1253 
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1254 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1255     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1256 {
1257     if (stateCache.validBindings) {
1258         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1259             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1260             const VkBuffer buffer = plat.buffer;
1261             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1262             if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1263                 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1264                     buffer,                                    // buffer
1265                     offset,                                    // offset
1266                     renderCmd.drawCount,                       // drawCount
1267                     renderCmd.stride);                         // stride
1268             } else {
1269                 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1270                     buffer,                             // buffer
1271                     (VkDeviceSize)renderCmd.offset,     // offset
1272                     renderCmd.drawCount,                // drawCount
1273                     renderCmd.stride);                  // stride
1274             }
1275 #if (RENDER_PERF_ENABLED == 1)
1276             stateCache.perfCounters.drawIndirectCount++;
1277 #endif
1278         }
1279     }
1280 }
1281 
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1282 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1283     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1284 {
1285     if (stateCache.validBindings) {
1286         vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1287             renderCmd.groupCountX,          // groupCountX
1288             renderCmd.groupCountY,          // groupCountY
1289             renderCmd.groupCountZ);         // groupCountZ
1290 #if (RENDER_PERF_ENABLED == 1)
1291         stateCache.perfCounters.dispatchCount++;
1292 #endif
1293     }
1294 }
1295 
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1296 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1297     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1298     const StateCache& stateCache)
1299 {
1300     if (stateCache.validBindings) {
1301         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1302             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1303             const VkBuffer buffer = plat.buffer;
1304             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1305             vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1306                 buffer,                                 // buffer
1307                 offset);                                // offset
1308 #if (RENDER_PERF_ENABLED == 1)
1309             stateCache.perfCounters.dispatchIndirectCount++;
1310 #endif
1311         }
1312     }
1313 }
1314 
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1315 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1316     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1317     StateCache& stateCache)
1318 {
1319     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1320     stateCache.renderCommandBeginRenderPass = &renderCmd;
1321 
1322     auto& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1323     // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1324     stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1325 
1326     // early out for multi render command list render pass
1327     if (stateCache.secondaryCommandBuffer) {
1328         return; // early out
1329     }
1330     const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1331                             (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1332     // invalidate the whole command list
1333     if (!validRpFbo) {
1334         stateCache.validCommandList = false;
1335         return; // early out
1336     }
1337 
1338     if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1339         if (renderCmd.subpassStartIndex < renderCmd.subpasses.size()) {
1340             if ((renderCmd.subpasses[renderCmd.subpassStartIndex].subpassFlags &
1341                     SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
1342                 const auto subpassContents = static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1343                 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1344                     subpassContents);                  // contents
1345             }
1346         }
1347         return; // early out
1348     }
1349 
1350     const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1351 
1352     VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1353     bool hasClearValues = false;
1354     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1355         const auto& ref = renderPassDesc.attachments[idx];
1356         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1357             ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1358             const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1359             VkClearValue cVal;
1360             if (RenderHandleUtil::IsDepthImage(handle)) {
1361                 PLUGIN_STATIC_ASSERT(sizeof(cVal.depthStencil) == sizeof(ref.clearValue.depthStencil));
1362                 cVal.depthStencil.depth = ref.clearValue.depthStencil.depth;
1363                 cVal.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1364             } else {
1365                 PLUGIN_STATIC_ASSERT(sizeof(cVal.color) == sizeof(ref.clearValue.color));
1366                 CloneData(&cVal.color, sizeof(cVal.color), &ref.clearValue.color, sizeof(ref.clearValue.color));
1367             }
1368             clearValues[idx] = cVal;
1369             hasClearValues = true;
1370         }
1371     }
1372 
1373     // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1374     // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1375     const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1376 
1377     VkRect2D renderArea {
1378         { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1379         { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1380     };
1381     // render area needs to be inside frame buffer
1382     const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1383     renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1384     renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1385     renderArea.extent.width = Math::min(renderArea.extent.width,
1386         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1387     renderArea.extent.height = Math::min(renderArea.extent.height,
1388         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1389 
1390     const VkRenderPassBeginInfo renderPassBeginInfo {
1391         VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,      // sType
1392         nullptr,                                       // pNext
1393         stateCache.lowLevelRenderPassData.renderPass,  // renderPass
1394         stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1395         renderArea,                                    // renderArea
1396         clearValueCount,                               // clearValueCount
1397         clearValues,                                   // pClearValues
1398     };
1399 
1400     // NOTE: could be patched in render graph
1401     // const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1402     const VkSubpassContents subpassContents =
1403         stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1404     vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1405         &renderPassBeginInfo,                  // pRenderPassBegin
1406         subpassContents);                      // contents
1407 #if (RENDER_PERF_ENABLED == 1)
1408     stateCache.perfCounters.renderPassCount++;
1409 #endif
1410 }
1411 
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1412 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1413     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1414 {
1415     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1416 
1417     const auto subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1418     vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1419         subpassContents);                  // contents
1420 }
1421 
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1422 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1423     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1424 {
1425     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1426 
1427     // early out for multi render command list render pass
1428     if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1429         return; // NOTE
1430     }
1431 
1432     stateCache.renderCommandBeginRenderPass = nullptr;
1433     stateCache.lowLevelRenderPassData = {};
1434 
1435     if (!stateCache.secondaryCommandBuffer) {
1436         vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1437     }
1438 }
1439 
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1440 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1441     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1442     const StateCache& stateCache)
1443 {
1444     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1445     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1446 
1447     const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1448 
1449     VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1450     VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1451     const GpuBufferVk* gpuBuffer = nullptr;
1452     RenderHandle currBufferHandle;
1453     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1454         const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1455         // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1456         // do not re-fetch the buffer if not needed
1457         if (currBufferHandle.id != currVb.bufferHandle.id) {
1458             currBufferHandle = currVb.bufferHandle;
1459             gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1460         }
1461         if (gpuBuffer) {
1462             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1463             const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1464             vertexBuffers[idx] = plat.buffer;
1465             offsets[idx] = offset;
1466         }
1467     }
1468 
1469     vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1470         0,                                       // firstBinding
1471         vertexBufferCount,                       // bindingCount
1472         vertexBuffers,                           // pBuffers
1473         offsets);                                // pOffsets
1474 }
1475 
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1476 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1477     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1478     const StateCache& stateCache)
1479 {
1480     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1481 
1482     PLUGIN_ASSERT(gpuBuffer);
1483     if (gpuBuffer) {
1484         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1485         const VkBuffer buffer = plat.buffer;
1486         const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1487         const auto indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1488 
1489         vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1490             buffer,                                // buffer
1491             offset,                                // offset
1492             indexType);                            // indexType
1493     }
1494 }
1495 
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1496 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1497     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1498 {
1499     const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1500     const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1501     if (srcImagePtr && dstImagePtr) {
1502         const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1503         const auto& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1504 
1505         const ImageBlit& ib = renderCmd.imageBlit;
1506         const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1507                                            ? srcPlatImage.arrayLayers
1508                                            : ib.srcSubresource.layerCount;
1509         const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1510                                            ? dstPlatImage.arrayLayers
1511                                            : ib.dstSubresource.layerCount;
1512 
1513         const VkImageSubresourceLayers srcSubresourceLayers {
1514             (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1515             ib.srcSubresource.mipLevel,                             // mipLevel
1516             ib.srcSubresource.baseArrayLayer,                       // baseArrayLayer
1517             srcLayerCount,                                          // layerCount
1518         };
1519         const VkImageSubresourceLayers dstSubresourceLayers {
1520             (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1521             ib.dstSubresource.mipLevel,                             // mipLevel
1522             ib.dstSubresource.baseArrayLayer,                       // baseArrayLayer
1523             dstLayerCount,                                          // layerCount
1524         };
1525 
1526         const VkImageBlit imageBlit {
1527             srcSubresourceLayers, // srcSubresource
1528             { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1529                 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1530                     (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1531             dstSubresourceLayers,                        // dstSubresource
1532             { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1533                 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1534                     (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1535         };
1536 
1537         vkCmdBlitImage(cmdBuf.commandBuffer,         // commandBuffer
1538             srcPlatImage.image,                      // srcImage
1539             (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1540             dstPlatImage.image,                      // dstImage
1541             (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1542             1,                                       // regionCount
1543             &imageBlit,                              // pRegions
1544             (VkFilter)renderCmd.filter);             // filter
1545     }
1546 }
1547 
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1548 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1549     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1550 {
1551     const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1552     const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1553 
1554     PLUGIN_ASSERT(srcGpuBuffer);
1555     PLUGIN_ASSERT(dstGpuBuffer);
1556 
1557     if (srcGpuBuffer && dstGpuBuffer) {
1558         const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1559         const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1560         const VkBufferCopy bufferCopy {
1561             renderCmd.bufferCopy.srcOffset,
1562             renderCmd.bufferCopy.dstOffset,
1563             renderCmd.bufferCopy.size,
1564         };
1565 
1566         if (bufferCopy.size > 0) {
1567             vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1568                 srcBuffer,                        // srcBuffer
1569                 dstBuffer,                        // dstBuffer
1570                 1,                                // regionCount
1571                 &bufferCopy);                     // pRegions
1572         }
1573     }
1574 }
1575 
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1576 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1577     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1578     const StateCache& stateCache)
1579 {
1580     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1581         PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1582         return;
1583     }
1584 
1585     const GpuBufferVk* gpuBuffer = nullptr;
1586     const GpuImageVk* gpuImage = nullptr;
1587     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1588         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1589         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1590     } else {
1591         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1592         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1593     }
1594 
1595     if (gpuBuffer && gpuImage) {
1596         const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1597         const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1598         const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1599         const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1600                                         ? platImage.arrayLayers
1601                                         : subresourceLayer.layerCount;
1602         const VkImageSubresourceLayers imageSubresourceLayer {
1603             (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1604             subresourceLayer.mipLevel,
1605             subresourceLayer.baseArrayLayer,
1606             layerCount,
1607         };
1608         const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1609         // Math::min to force staying inside image
1610         const uint32_t mip = subresourceLayer.mipLevel;
1611         const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1612         const Size3D& imageOffset = bufferImageCopy.imageOffset;
1613         const VkExtent3D imageExtent = {
1614             Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1615             Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1616             Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1617         };
1618         const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1619                            (imageOffset.depth < imageSize.depth);
1620         const VkBufferImageCopy bufferImageCopyVk {
1621             bufferImageCopy.bufferOffset,
1622             bufferImageCopy.bufferRowLength,
1623             bufferImageCopy.bufferImageHeight,
1624             imageSubresourceLayer,
1625             { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1626                 static_cast<int32_t>(imageOffset.depth) },
1627             imageExtent,
1628         };
1629 
1630         const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1631         const VkImage image = (gpuImage->GetPlatformData()).image;
1632 
1633         if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1634             vkCmdCopyBufferToImage(cmdBuf.commandBuffer,             // commandBuffer
1635                 buffer,                                              // srcBuffer
1636                 image,                                               // dstImage
1637                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1638                 1,                                                   // regionCount
1639                 &bufferImageCopyVk);                                 // pRegions
1640         } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1641             vkCmdCopyImageToBuffer(cmdBuf.commandBuffer,             // commandBuffer
1642                 image,                                               // srcImage
1643                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1644                 buffer,                                              // dstBuffer
1645                 1,                                                   // regionCount
1646                 &bufferImageCopyVk);                                 // pRegions
1647         }
1648     }
1649 }
1650 
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1651 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1652     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1653 {
1654     const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1655     const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1656     if (srcGpuImage && dstGpuImage) {
1657         const ImageCopy& copy = renderCmd.imageCopy;
1658         const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1659         const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1660 
1661         const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1662         const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1663         const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1664                                            ? srcPlatImage.arrayLayers
1665                                            : srcSubresourceLayer.layerCount;
1666         const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1667                                            ? dstPlatImage.arrayLayers
1668                                            : dstSubresourceLayer.layerCount;
1669 
1670         const VkImageSubresourceLayers srcImageSubresourceLayer {
1671             (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1672             srcSubresourceLayer.mipLevel,
1673             srcSubresourceLayer.baseArrayLayer,
1674             srcLayerCount,
1675         };
1676         const VkImageSubresourceLayers dstImageSubresourceLayer {
1677             (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1678             dstSubresourceLayer.mipLevel,
1679             dstSubresourceLayer.baseArrayLayer,
1680             dstLayerCount,
1681         };
1682 
1683         const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1684         const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1685 
1686         VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1687         ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1688         ext.height =
1689             Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1690         ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1691 
1692         const VkImageCopy imageCopyVk {
1693             srcImageSubresourceLayer,                                 // srcSubresource
1694             { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1695             dstImageSubresourceLayer,                                 // dstSubresource
1696             { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1697             ext,                                                      // extent
1698         };
1699         vkCmdCopyImage(cmdBuf.commandBuffer,                     // commandBuffer
1700             srcPlatImage.image,                                  // srcImage
1701             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1702             dstPlatImage.image,                                  // dstImage
1703             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1704             1,                                                   // regionCount
1705             &imageCopyVk);                                       // pRegions
1706     }
1707 }
1708 
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1709 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1710     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1711     const RenderBarrierList& rbl)
1712 {
1713     if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1714         return;
1715     }
1716 
1717     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1718         rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1719     PLUGIN_ASSERT(barrierPointBarriers);
1720     if (!barrierPointBarriers) {
1721         return;
1722     }
1723     constexpr uint32_t maxBarrierCount { 8 };
1724     VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1725     VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1726     VkMemoryBarrier memoryBarriers[maxBarrierCount];
1727 
1728     // generally there is only single barrierListCount per barrier point
1729     // in situations with batched render passes there can be many
1730     // NOTE: all barrier lists could be patched to single vk command if needed
1731     // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1732     const auto barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1733     const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1734 #if (RENDER_VALIDATION_ENABLED == 1)
1735     uint32_t fullBarrierCount = 0u;
1736 #endif
1737     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1738         if (nextBarrierList == nullptr) { // cannot be null, just a safety
1739             PLUGIN_ASSERT(false);
1740             return;
1741         }
1742         const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1743         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1744         const auto barrierCount = (uint32_t)barrierListRef.count;
1745 
1746         uint32_t bufferBarrierIdx = 0;
1747         uint32_t imageBarrierIdx = 0;
1748         uint32_t memoryBarrierIdx = 0;
1749 
1750         VkPipelineStageFlags srcPipelineStageMask { 0 };
1751         VkPipelineStageFlags dstPipelineStageMask { 0 };
1752         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1753 
1754         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1755             const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1756 
1757             uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1758             uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1759             if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1760                 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1761                 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1762             }
1763 
1764             const RenderHandle resourceHandle = ref.resourceHandle;
1765             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1766 
1767             PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1768                           (handleType == RenderHandleType::GPU_IMAGE));
1769 
1770             const auto srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1771             const auto dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1772 
1773             srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1774             dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1775 
1776             // NOTE: zero size buffer barriers allowed ATM
1777             if (handleType == RenderHandleType::GPU_BUFFER) {
1778                 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1779                     const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1780                     // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1781                     const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1782                     const VkDeviceSize size =
1783                         Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1784                             (VkDeviceSize)ref.dst.optionalByteSize);
1785                     if (platBuffer.buffer) {
1786                         bufferMemoryBarriers[bufferBarrierIdx++] = {
1787                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1788                             nullptr,                                 // pNext
1789                             srcAccessMask,                           // srcAccessMask
1790                             dstAccessMask,                           // dstAccessMask
1791                             srcQueueFamilyIndex,                     // srcQueueFamilyIndex
1792                             dstQueueFamilyIndex,                     // dstQueueFamilyIndex
1793                             platBuffer.buffer,                       // buffer
1794                             offset,                                  // offset
1795                             size,                                    // size
1796                         };
1797                     }
1798                 }
1799             } else if (handleType == RenderHandleType::GPU_IMAGE) {
1800                 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1801                     const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1802 
1803                     const auto srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1804                     const auto dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1805 
1806                     const VkImageAspectFlags imageAspectFlags =
1807                         (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1808                             ? platImage.aspectFlags
1809                             : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1810 
1811                     const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1812                                                     PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1813                                                     ? VK_REMAINING_MIP_LEVELS
1814                                                     : ref.src.optionalImageSubresourceRange.levelCount;
1815 
1816                     const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1817                                                     PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1818                                                     ? VK_REMAINING_ARRAY_LAYERS
1819                                                     : ref.src.optionalImageSubresourceRange.layerCount;
1820 
1821                     const VkImageSubresourceRange imageSubresourceRange {
1822                         imageAspectFlags,                                     // aspectMask
1823                         ref.src.optionalImageSubresourceRange.baseMipLevel,   // baseMipLevel
1824                         levelCount,                                           // levelCount
1825                         ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1826                         layerCount,                                           // layerCount
1827                     };
1828 
1829                     if (platImage.image) {
1830                         imageMemoryBarriers[imageBarrierIdx++] = {
1831                             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1832                             nullptr,                                // pNext
1833                             srcAccessMask,                          // srcAccessMask
1834                             dstAccessMask,                          // dstAccessMask
1835                             srcImageLayout,                         // oldLayout
1836                             dstImageLayout,                         // newLayout
1837                             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
1838                             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
1839                             platImage.image,                        // image
1840                             imageSubresourceRange,                  // subresourceRange
1841                         };
1842                     }
1843                 }
1844             } else {
1845                 memoryBarriers[memoryBarrierIdx++] = {
1846                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1847                     nullptr,                          // pNext
1848                     srcAccessMask,                    // srcAccessMask
1849                     dstAccessMask,                    // dstAccessMask
1850                 };
1851             }
1852 
1853             const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1854             const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1855                                         (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)));
1856 
1857             if (hasBarriers && resetBarriers) {
1858 #if (RENDER_VALIDATION_ENABLED == 1)
1859                 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1860 #endif
1861                 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1862                     srcPipelineStageMask,                  // srcStageMask
1863                     dstPipelineStageMask,                  // dstStageMask
1864                     dependencyFlags,                       // dependencyFlags
1865                     memoryBarrierIdx,                      // memoryBarrierCount
1866                     memoryBarriers,                        // pMemoryBarriers
1867                     bufferBarrierIdx,                      // bufferMemoryBarrierCount
1868                     bufferMemoryBarriers,                  // pBufferMemoryBarriers
1869                     imageBarrierIdx,                       // imageMemoryBarrierCount
1870                     imageMemoryBarriers);                  // pImageMemoryBarriers
1871 
1872                 bufferBarrierIdx = 0;
1873                 imageBarrierIdx = 0;
1874                 memoryBarrierIdx = 0;
1875             }
1876         }
1877     }
1878 #if (RENDER_VALIDATION_ENABLED == 1)
1879     if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1880         PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1881             "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1882             barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1883     }
1884 #endif
1885 }
1886 
1887 namespace {
1888 struct DescriptorSetUpdateDataStruct {
1889     uint32_t accelIndex { 0U };
1890     uint32_t bufferIndex { 0U };
1891     uint32_t imageIndex { 0U };
1892     uint32_t samplerIndex { 0U };
1893     uint32_t writeBindIdx { 0U };
1894 };
1895 
UpdateSingleDescriptorSet(const GpuResourceManager & gpuResourceMgr,RenderBackendVk::StateCache * stateCache,const LowLevelDescriptorSetVk * descriptorSet,const DescriptorSetLayoutBindingResourcesHandler & bindingResources,LowLevelContextDescriptorWriteDataVk & wd,DescriptorSetUpdateDataStruct & dsud)1896 void UpdateSingleDescriptorSet(const GpuResourceManager& gpuResourceMgr, RenderBackendVk::StateCache* stateCache,
1897     const LowLevelDescriptorSetVk* descriptorSet, const DescriptorSetLayoutBindingResourcesHandler& bindingResources,
1898     LowLevelContextDescriptorWriteDataVk& wd, DescriptorSetUpdateDataStruct& dsud)
1899 {
1900     // actual vulkan descriptor set update
1901     if (descriptorSet && descriptorSet->descriptorSet) {
1902         if ((uint32_t)bindingResources.bindings.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1903             PLUGIN_ASSERT(false);
1904             return;
1905         }
1906         const auto& buffers = bindingResources.buffers;
1907         const auto& images = bindingResources.images;
1908         const auto& samplers = bindingResources.samplers;
1909         for (const auto& refBuf : buffers) {
1910             const auto& ref = refBuf.desc;
1911             const uint32_t descriptorCount = ref.binding.descriptorCount;
1912             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1913             if (descriptorCount == 0) {
1914                 continue;
1915             }
1916             const uint32_t arrayOffset = ref.arrayOffset;
1917             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1918             if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1919 #if (RENDER_VULKAN_RT_ENABLED == 1)
1920                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1921                     // first is the ref, starting from 1 we use array offsets
1922                     const BindableBuffer& bRes =
1923                         (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1924                     if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1925                         const GpuAccelerationStructurePlatformDataVk& platAccel =
1926                             resPtr->GetPlatformDataAccelerationStructure();
1927                         wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1928                             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1929                             nullptr,                                                           // pNext
1930                             descriptorCount,                  // accelerationStructureCount
1931                             &platAccel.accelerationStructure, // pAccelerationStructures
1932                         };
1933                     }
1934                 }
1935                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1936                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1937                     &wd.descriptorAccelInfos[dsud.accelIndex],    // pNext
1938                     descriptorSet->descriptorSet,                 // dstSet
1939                     ref.binding.binding,                          // dstBinding
1940                     0,                                            // dstArrayElement
1941                     descriptorCount,                              // descriptorCount
1942                     (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1943                     nullptr,                                      // pImageInfo
1944                     nullptr,                                      // pBufferInfo
1945                     nullptr,                                      // pTexelBufferView
1946                 };
1947                 dsud.accelIndex += descriptorCount;
1948 #endif
1949             } else {
1950                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1951                     // first is the ref, starting from 1 we use array offsets
1952                     const BindableBuffer& bRes =
1953                         (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1954                     const auto optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1955                     if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1956                         const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1957                         // takes into account dynamic ring buffers with mapping
1958                         const auto bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1959                         const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1960                         const VkDeviceSize bufferRange =
1961                             Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1962                                 (VkDeviceSize)bRes.byteSize);
1963                         wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1964                             platBuffer.buffer, // buffer
1965                             byteOffset,        // offset
1966                             bufferRange,       // range
1967                         };
1968                     }
1969                 }
1970                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1971                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1972                     nullptr,                                      // pNext
1973                     descriptorSet->descriptorSet,                 // dstSet
1974                     ref.binding.binding,                          // dstBinding
1975                     0,                                            // dstArrayElement
1976                     descriptorCount,                              // descriptorCount
1977                     (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1978                     nullptr,                                      // pImageInfo
1979                     &wd.descriptorBufferInfos[dsud.bufferIndex],  // pBufferInfo
1980                     nullptr,                                      // pTexelBufferView
1981                 };
1982                 dsud.bufferIndex += descriptorCount;
1983             }
1984         }
1985         for (const auto& refImg : images) {
1986             const auto& ref = refImg.desc;
1987             const uint32_t descriptorCount = ref.binding.descriptorCount;
1988             // skip, array bindings which are bound from first index have also descriptorCount 0
1989             if (descriptorCount == 0) {
1990                 continue;
1991             }
1992             const auto descriptorType = (VkDescriptorType)ref.binding.descriptorType;
1993             const uint32_t arrayOffset = ref.arrayOffset;
1994             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1995             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1996                 // first is the ref, starting from 1 we use array offsets
1997                 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].desc.resource;
1998                 if (const GpuImageVk* resPtr = gpuResourceMgr.GetImage<GpuImageVk>(bRes.handle); resPtr) {
1999                     VkSampler sampler = VK_NULL_HANDLE;
2000                     if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2001                         const GpuSamplerVk* samplerPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2002                         if (samplerPtr) {
2003                             sampler = samplerPtr->GetPlatformData().sampler;
2004                         }
2005                     }
2006                     const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2007                     const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2008                     VkImageView imageView = platImage.imageView;
2009                     if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2010                         (bRes.layer < platImageViews.layerImageViews.size())) {
2011                         imageView = platImageViews.layerImageViews[bRes.layer];
2012                     } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2013                         if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2014                             (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2015                             imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2016                         } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2017                             imageView = platImageViews.mipImageViews[bRes.mip];
2018                         }
2019                     }
2020                     wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2021                         sampler,                         // sampler
2022                         imageView,                       // imageView
2023                         (VkImageLayout)bRes.imageLayout, // imageLayout
2024                     };
2025                 }
2026             }
2027             wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2028                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,    // sType
2029                 nullptr,                                   // pNext
2030                 descriptorSet->descriptorSet,              // dstSet
2031                 ref.binding.binding,                       // dstBinding
2032                 0,                                         // dstArrayElement
2033                 descriptorCount,                           // descriptorCount
2034                 descriptorType,                            // descriptorType
2035                 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2036                 nullptr,                                   // pBufferInfo
2037                 nullptr,                                   // pTexelBufferView
2038             };
2039             dsud.imageIndex += descriptorCount;
2040         }
2041         for (const auto& refSam : samplers) {
2042             const auto& ref = refSam.desc;
2043             const uint32_t descriptorCount = ref.binding.descriptorCount;
2044             // skip, array bindings which are bound from first index have also descriptorCount 0
2045             if (descriptorCount == 0) {
2046                 continue;
2047             }
2048             const uint32_t arrayOffset = ref.arrayOffset;
2049             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2050             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2051                 // first is the ref, starting from 1 we use array offsets
2052                 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].desc.resource;
2053                 if (const GpuSamplerVk* resPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2054                     const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2055                     wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2056                         platSampler.sampler,      // sampler
2057                         VK_NULL_HANDLE,           // imageView
2058                         VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2059                     };
2060                 }
2061             }
2062             wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2063                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,        // sType
2064                 nullptr,                                       // pNext
2065                 descriptorSet->descriptorSet,                  // dstSet
2066                 ref.binding.binding,                           // dstBinding
2067                 0,                                             // dstArrayElement
2068                 descriptorCount,                               // descriptorCount
2069                 (VkDescriptorType)ref.binding.descriptorType,  // descriptorType
2070                 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2071                 nullptr,                                       // pBufferInfo
2072                 nullptr,                                       // pTexelBufferView
2073             };
2074             dsud.samplerIndex += descriptorCount;
2075         }
2076 #if (RENDER_PERF_ENABLED == 1)
2077         // count the actual updated descriptors sets, not the api calls
2078         if (stateCache) {
2079             stateCache->perfCounters.updateDescriptorSetCount++;
2080         }
2081 #endif
2082     }
2083 }
2084 } // namespace
2085 
UpdateGlobalDescriptorSets()2086 void RenderBackendVk::UpdateGlobalDescriptorSets()
2087 {
2088     RENDER_CPU_PERF_SCOPE("UpdateGlobalDescriptorSets", "");
2089 
2090     auto& dsMgr = (DescriptorSetManagerVk&)device_.GetDescriptorSetManager();
2091     LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2092     const auto& allDescSets = dsMgr.GetUpdateDescriptorSetHandles();
2093     const uint32_t upDescriptorSetCount =
2094         static_cast<uint32_t>(Math::min(allDescSets.size(), wd.writeDescriptorSets.size()));
2095     DescriptorSetUpdateDataStruct dsud;
2096 
2097     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2098         if (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET) {
2099             continue;
2100         }
2101         const RenderHandle descHandle = allDescSets[descIdx];
2102         // first update gpu descriptor indices
2103         dsMgr.UpdateDescriptorSetGpuHandle(descHandle);
2104 
2105         const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2106         const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2107 
2108         UpdateSingleDescriptorSet(gpuResourceMgr_, nullptr, descriptorSet, bindingResources, wd, dsud);
2109 
2110         // NOTE: should update perf counters
2111     }
2112 
2113     // update if the batch ended or we are the last descriptor set
2114     if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2115         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2116         vkUpdateDescriptorSets(device,     // device
2117             dsud.writeBindIdx,             // descriptorWriteCount
2118             wd.writeDescriptorSets.data(), // pDescriptorWrites
2119             0,                             // descriptorCopyCount
2120             nullptr);                      // pDescriptorCopies
2121     }
2122 }
2123 
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)2124 void RenderBackendVk::UpdateCommandListDescriptorSets(
2125     const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
2126 {
2127     auto& dsMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
2128 
2129     const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2130     const auto upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
2131     LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2132     DescriptorSetUpdateDataStruct dsud;
2133     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2134         if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
2135             (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
2136             continue;
2137         }
2138 
2139         const RenderHandle descHandle = allDescSets[descIdx];
2140         // first update gpu descriptor indices
2141         dsMgr.UpdateDescriptorSetGpuHandle(descHandle);
2142 
2143         const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2144         const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2145 
2146         UpdateSingleDescriptorSet(gpuResourceMgr_, &stateCache, descriptorSet, bindingResources, wd, dsud);
2147     }
2148     // update if the batch ended or we are the last descriptor set
2149     if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2150         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2151         vkUpdateDescriptorSets(device,     // device
2152             dsud.writeBindIdx,             // descriptorWriteCount
2153             wd.writeDescriptorSets.data(), // pDescriptorWrites
2154             0,                             // descriptorCopyCount
2155             nullptr);                      // pDescriptorCopies
2156     }
2157 }
2158 
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2159 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2160     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2161     StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2162 {
2163     const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2164 
2165     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2166     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2167     const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2168                                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
2169                                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
2170     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2171 
2172     bool valid = (pipelineLayout != VK_NULL_HANDLE);
2173     const uint32_t firstSet = renderCmd.firstSet;
2174     const uint32_t setCount = renderCmd.setCount;
2175     if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2176         uint32_t dynamicOffsetDescriptorSetIndices = 0;
2177         uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2178 
2179         VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2180         const uint32_t firstPlusCount = firstSet + setCount;
2181         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2182             const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2183             if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2184                 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2185                 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2186 
2187                 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2188                 if (descriptorSet && descriptorSet->descriptorSet) {
2189                     descriptorSets[idx] = descriptorSet->descriptorSet;
2190                     // update, copy to state cache
2191                     PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2192                     stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2193                     const uint32_t currShift = (idx * 16u);
2194                     const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2195                     uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2196                     stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2197                 } else {
2198                     valid = false;
2199                 }
2200             }
2201         }
2202 
2203         uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2204                                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2205         uint32_t dynamicOffsetIdx = 0;
2206         // NOTE: optimize
2207         // this code has some safety checks that the offset is not updated for non-dynamic sets
2208         // it could be left on only for validation
2209         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2210             if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2211                 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2212                 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2213                 const auto dodResCount = static_cast<uint32_t>(dod.resources.size());
2214                 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2215                 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2216                     uint32_t byteOffset = 0U;
2217                     if (descriptorSetDynamicOffsets.dynamicOffsets &&
2218                         (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount)) {
2219                         byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dodIdx];
2220                     }
2221                     dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2222                 }
2223             }
2224         }
2225 
2226         stateCache.validBindings = valid;
2227         if (stateCache.validBindings) {
2228             if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2229                 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2230                     pipelineBindPoint,                        // pipelineBindPoint
2231                     pipelineLayout,                           // layout
2232                     firstSet,                                 // firstSet
2233                     setCount,                                 // descriptorSetCount
2234                     &descriptorSets[firstSet],                // pDescriptorSets
2235                     dynamicOffsetIdx,                         // dynamicOffsetCount
2236                     dynamicOffsets);                          // pDynamicOffsets
2237 #if (RENDER_PERF_ENABLED == 1)
2238                 stateCache.perfCounters.bindDescriptorSetCount++;
2239 #endif
2240             } else {
2241                 // possible pso re-creation and bind of these sets to the new pso
2242                 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2243                     (PipelineBindPoint)pipelineBindPoint };
2244                 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2245                 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2246             }
2247         }
2248     }
2249 }
2250 
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2251 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2252     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2253 {
2254     PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2255     PLUGIN_ASSERT(renderCmd.data);
2256 
2257     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2258     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2259 
2260     const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0));
2261     PLUGIN_ASSERT(valid);
2262 
2263     if (valid) {
2264         const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2265         vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2266             pipelineLayout,                      // layout
2267             shaderStageFlags,                    // stageFlags
2268             0,                                   // offset
2269             renderCmd.pushConstant.byteSize,     // size
2270             static_cast<void*>(renderCmd.data)); // pValues
2271     }
2272 }
2273 
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2274 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2275     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2276     const StateCache& stateCache)
2277 {
2278 #if (RENDER_VULKAN_RT_ENABLED == 1)
2279     // NOTE: missing
2280     const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2281     const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2282     if ((!dst) && (!scratchBuffer)) {
2283         return; // early out
2284     }
2285     const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2286     const VkDevice device = devicePlat.device;
2287 
2288     const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2289     const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2290 
2291     // scratch data with user offset
2292     const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2293                                         VkDeviceSize(renderCmd.scratchOffset) };
2294 
2295     const size_t arraySize =
2296         renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2297     vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2298     vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2299 
2300     size_t arrayIndex = 0;
2301     for (const auto& trianglesRef : renderCmd.trianglesView) {
2302         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2303             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2304             nullptr,                                               // pNext
2305             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
2306             {},                                                    // geometry;
2307             0,                                                     // flags
2308         };
2309         uint32_t primitiveCount = 0;
2310         const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2311         const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2312         if (vb && ib) {
2313             const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2314                 device, vb->GetPlatformData().buffer) };
2315             const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2316                 device, ib->GetPlatformData().buffer) };
2317             VkDeviceOrHostAddressConstKHR transformData {};
2318             if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2319                 if (const GpuBufferVk* tr =
2320                         gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2321                     tr) {
2322                     transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2323                 }
2324             }
2325             primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2326 
2327             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2328             geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2329                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2330                 nullptr,                                                              // pNext
2331                 VkFormat(trianglesRef.info.vertexFormat),                             // vertexFormat
2332                 vertexData,                                                           // vertexData
2333                 VkDeviceSize(trianglesRef.info.vertexStride),                         // vertexStride
2334                 trianglesRef.info.maxVertex,                                          // maxVertex
2335                 VkIndexType(trianglesRef.info.indexType),                             // indexType
2336                 indexData,                                                            // indexData
2337                 transformData,                                                        // transformData
2338             };
2339         }
2340         buildRangeInfos[arrayIndex] = {
2341             primitiveCount, // primitiveCount
2342             0u,             // primitiveOffset
2343             0u,             // firstVertex
2344             0u,             // transformOffset
2345         };
2346         arrayIndex++;
2347     }
2348     for (const auto& aabbsRef : renderCmd.aabbsView) {
2349         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2350             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2351             nullptr,                                               // pNext
2352             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
2353             {},                                                    // geometry;
2354             0,                                                     // flags
2355         };
2356         VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2357         if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2358             deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2359         }
2360         geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2361         geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2362             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2363             nullptr,                                                          // pNext
2364             deviceAddress,                                                    // data
2365             aabbsRef.info.stride,                                             // stride
2366         };
2367         buildRangeInfos[arrayIndex] = {
2368             1u, // primitiveCount
2369             0u, // primitiveOffset
2370             0u, // firstVertex
2371             0u, // transformOffset
2372         };
2373         arrayIndex++;
2374     }
2375     for (const auto& instancesRef : renderCmd.instancesView) {
2376         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2377             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2378             nullptr,                                               // pNext
2379             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
2380             {},                                                    // geometry;
2381             0,                                                     // flags
2382         };
2383         VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2384         if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle); iPtr) {
2385             deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2386         }
2387         geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2388         geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2389             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2390             nullptr,                                                              // pNext
2391             instancesRef.info.arrayOfPointers,                                    // arrayOfPointers
2392             deviceAddress,                                                        // data
2393         };
2394         buildRangeInfos[arrayIndex] = {
2395             1u, // primitiveCount
2396             0u, // primitiveOffset
2397             0u, // firstVertex
2398             0u, // transformOffset
2399         };
2400         arrayIndex++;
2401     }
2402 
2403     const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2404         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2405         nullptr,                                                          // pNext
2406         VkAccelerationStructureTypeKHR(renderCmd.type),                   // type
2407         VkBuildAccelerationStructureFlagsKHR(renderCmd.flags),            // flags
2408         VkBuildAccelerationStructureModeKHR(renderCmd.mode),              // mode
2409         VK_NULL_HANDLE,                                                   // srcAccelerationStructure
2410         dstAs,                                                            // dstAccelerationStructure
2411         uint32_t(arrayIndex),                                             // geometryCount
2412         geometryData.data(),                                              // pGeometries
2413         nullptr,                                                          // ppGeometries
2414         scratchData,                                                      // scratchData
2415     };
2416 
2417     vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2418     for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2419         buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2420     }
2421     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2422     if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2423         extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2424             1u,                                                                // infoCount
2425             &buildGeometryInfo,                                                // pInfos
2426             buildRangeInfosPtr.data());                                        // ppBuildRangeInfos
2427     }
2428 #endif
2429 }
2430 
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2431 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2432     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2433     const StateCache& stateCache)
2434 {
2435     const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2436     // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2437     const auto imageLayout = (VkImageLayout)renderCmd.imageLayout;
2438     PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2439     if (imagePtr) {
2440         const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2441         if (platImage.image) {
2442             VkClearColorValue clearColor;
2443             PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2444             CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2445 
2446             // NOTE: temporary vector allocated due to not having max limit
2447             vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2448             for (size_t idx = 0; idx < ranges.size(); ++idx) {
2449                 const auto& inputRef = renderCmd.ranges[idx];
2450                 ranges[idx] = {
2451                     (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2452                     inputRef.baseMipLevel,                         // baseMipLevel
2453                     inputRef.levelCount,                           // levelCount
2454                     inputRef.baseArrayLayer,                       // baseArrayLayer
2455                     inputRef.layerCount,                           // layerCount
2456                 };
2457             }
2458 
2459             vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2460                 platImage.image,                       // image
2461                 imageLayout,                           // imageLayout
2462                 &clearColor,                           // pColor
2463                 static_cast<uint32_t>(ranges.size()),  // rangeCount
2464                 ranges.data());                        // pRanges
2465         }
2466     }
2467 }
2468 
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2469 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2470     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2471     const StateCache& stateCache)
2472 {
2473     const ViewportDesc& vd = renderCmd.viewportDesc;
2474 
2475     VkViewport vp {
2476         vd.x,        // x
2477         vd.y,        // y
2478         vd.width,    // width
2479         vd.height,   // height
2480         vd.minDepth, // minDepth
2481         vd.maxDepth, // maxDepth
2482     };
2483     // handle viewport for surface transform
2484     const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2485     if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2486         if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2487             vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.height - vd.y;
2488             vp.y = vd.x;
2489             vp.width = vd.height;
2490             vp.height = vd.width;
2491         } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2492             vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.width - vd.x;
2493             vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.height - vd.y;
2494         } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2495             vp.x = vd.y;
2496             vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.width - vd.x;
2497             vp.width = vd.height;
2498             vp.height = vd.width;
2499         }
2500     }
2501 
2502     vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2503         0,                                 // firstViewport
2504         1,                                 // viewportCount
2505         &vp);                              // pViewports
2506 }
2507 
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2508 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2509     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2510     const StateCache& stateCache)
2511 {
2512     const ScissorDesc& sd = renderCmd.scissorDesc;
2513 
2514     VkRect2D sc {
2515         { sd.offsetX, sd.offsetY },          // offset
2516         { sd.extentWidth, sd.extentHeight }, // extent
2517     };
2518     // handle scissor for surface transform
2519     const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2520     if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2521         if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2522             sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.height - sc.offset.y, sc.offset.x },
2523                 { sc.extent.height, sc.extent.width } };
2524         } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2525             sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.width - sc.offset.x,
2526                        (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.height - sc.offset.y },
2527                 { sc.extent.width, sc.extent.height } };
2528         } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2529             sc = { { sc.offset.y, (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.width - sc.offset.x },
2530                 { sc.extent.height, sc.extent.width } };
2531         }
2532     }
2533 
2534     vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2535         0,                                // firstScissor
2536         1,                                // scissorCount
2537         &sc);                             // pScissors
2538 }
2539 
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2540 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2541     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2542     const StateCache& stateCache)
2543 {
2544     vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2545         renderCmd.lineWidth);               // lineWidth
2546 }
2547 
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2548 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2549     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2550     const StateCache& stateCache)
2551 {
2552     vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2553         renderCmd.depthBiasConstantFactor,  // depthBiasConstantFactor
2554         renderCmd.depthBiasClamp,           // depthBiasClamp
2555         renderCmd.depthBiasSlopeFactor);    // depthBiasSlopeFactor
2556 }
2557 
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2558 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2559     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2560     const StateCache& stateCache)
2561 {
2562     vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2563         renderCmd.blendConstants);               // blendConstants[4]
2564 }
2565 
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2566 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2567     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2568     const StateCache& stateCache)
2569 {
2570     vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2571         renderCmd.minDepthBounds,             // minDepthBounds
2572         renderCmd.maxDepthBounds);            // maxDepthBounds
2573 }
2574 
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2575 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2576     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2577     const StateCache& stateCache)
2578 {
2579     const auto stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2580 
2581     if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2582         vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2583             stencilFaceMask,                             // faceMask
2584             renderCmd.mask);                             // compareMask
2585     } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2586         vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2587             stencilFaceMask,                           // faceMask
2588             renderCmd.mask);                           // writeMask
2589     } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2590         vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2591             stencilFaceMask,                           // faceMask
2592             renderCmd.mask);                           // reference
2593     }
2594 }
2595 
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2596 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2597     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2598     const StateCache& stateCache)
2599 {
2600 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2601     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2602     if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2603         const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2604         const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2605             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2606             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2607         };
2608 
2609         extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2610             &fragmentSize,                                                // pFragmentSize
2611             combinerOps);                                                 // combinerOps
2612     }
2613 #endif
2614 }
2615 
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2616 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2617     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2618     const StateCache& stateCache)
2619 {
2620     if (stateCache.backendNode) {
2621         const RenderBackendRecordingStateVk recordingState = {
2622             {},
2623             cmdBuf.commandBuffer,                              // commandBuffer
2624             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2625             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2626             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2627             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2628             stateCache.pipelineLayout,                         // pipelineLayout
2629         };
2630         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2631         stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2632     }
2633 }
2634 
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2635 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2636     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2637 {
2638     PLUGIN_ASSERT_MSG(false, "not implemented");
2639 
2640     const auto pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2641     const uint32_t queryIndex = renderCmd.queryIndex;
2642     VkQueryPool queryPool = VK_NULL_HANDLE;
2643 
2644     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2645         queryPool,                            // queryPool
2646         queryIndex,                           // firstQuery
2647         1);                                   // queryCount
2648 
2649     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2650         pipelineStageFlagBits,                // pipelineStage
2651         queryPool,                            // queryPool
2652         queryIndex);                          // query
2653 }
2654 
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2655 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2656 {
2657     for (auto& presRef : presentationData_.infos) {
2658         if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2659             continue;
2660         }
2661 
2662         PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2663         PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2664 
2665         const GpuResourceState& state = presRef.renderGraphProcessedState;
2666         const auto srcAccessMask = (VkAccessFlags)state.accessFlags;
2667         const auto dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2668         const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2669                                                   (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2670         const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2671         const auto oldLayout = (VkImageLayout)presRef.imageLayout;
2672         const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2673         // NOTE: queue is not currently checked (should be in the same queue as last time used)
2674         constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2675         constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2676         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2677         constexpr VkImageSubresourceRange imageSubresourceRange {
2678             VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2679             0,                                                // baseMipLevel
2680             1,                                                // levelCount
2681             0,                                                // baseArrayLayer
2682             1,                                                // layerCount
2683         };
2684 
2685         const VkImageMemoryBarrier imageMemoryBarrier {
2686             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2687             nullptr,                                // pNext
2688             srcAccessMask,                          // srcAccessMask
2689             dstAccessMask,                          // dstAccessMask
2690             oldLayout,                              // oldLayout
2691             newLayout,                              // newLayout
2692             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
2693             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
2694             presRef.swapchainImage,                 // image
2695             imageSubresourceRange,                  // subresourceRange
2696         };
2697 
2698         vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2699             srcStageMask,                          // srcStageMask
2700             dstStageMask,                          // dstStageMask
2701             dependencyFlags,                       // dependencyFlags
2702             0,                                     // memoryBarrierCount
2703             nullptr,                               // pMemoryBarriers
2704             0,                                     // bufferMemoryBarrierCount
2705             nullptr,                               // pBufferMemoryBarriers
2706             1,                                     // imageMemoryBarrierCount
2707             &imageMemoryBarrier);                  // pImageMemoryBarriers
2708 
2709         presRef.presentationLayoutChangeNeeded = false;
2710         presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2711     }
2712 }
2713 
2714 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
BeginDebugMarker(const LowLevelCommandBufferVk & cmdBuf,const BASE_NS::string_view name,const Math::Vec4 color)2715 void RenderBackendVk::BeginDebugMarker(
2716     const LowLevelCommandBufferVk& cmdBuf, const BASE_NS::string_view name, const Math::Vec4 color)
2717 {
2718     if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
2719         const VkDebugUtilsLabelEXT label {
2720             VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
2721             nullptr,                                 // pNext
2722             name.data(),                             // pLabelName
2723             { color.x, color.y, color.z, color.w }   // color[4]
2724         };
2725         deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuf.commandBuffer, &label);
2726     }
2727 }
2728 
EndDebugMarker(const LowLevelCommandBufferVk & cmdBuf)2729 void RenderBackendVk::EndDebugMarker(const LowLevelCommandBufferVk& cmdBuf)
2730 {
2731     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
2732         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuf.commandBuffer);
2733     }
2734 }
2735 #endif
2736 
2737 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
RenderCommand(const RenderCommandBeginDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2738 void RenderBackendVk::RenderCommand(const RenderCommandBeginDebugMarker& renderCmd,
2739     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2740     const StateCache& stateCache)
2741 {
2742     BeginDebugMarker(cmdBuf, renderCmd.name, renderCmd.color);
2743 }
2744 
RenderCommand(const RenderCommandEndDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2745 void RenderBackendVk::RenderCommand(const RenderCommandEndDebugMarker& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2746     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2747 {
2748     EndDebugMarker(cmdBuf);
2749 }
2750 #endif
2751 
2752 #if (RENDER_PERF_ENABLED == 1)
2753 
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2754 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2755 {
2756     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2757         const string_view& debugName = renderCommandContext.debugName;
2758         if (timers_.count(debugName) == 0) { // new timers
2759 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2760             PerfDataSet& perfDataSet = timers_[debugName];
2761             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2762             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2763             constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2764             perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2765 #else
2766             timers_.insert({ debugName, {} });
2767 #endif
2768         }
2769     }
2770 
2771 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2772     perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2773     perfGpuTimerData_.currentOffset =
2774         (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2775 #endif
2776 }
2777 
EndFrameTimers()2778 void RenderBackendVk::EndFrameTimers()
2779 {
2780     int64_t fullGpuTime = 0;
2781 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2782     // already in micros
2783     fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2784     perfGpuTimerData_.fullGpuCounter = 0;
2785 
2786     perfGpuTimerData_.gpuBuffer->Unmap();
2787 #endif
2788     if (IPerformanceDataManagerFactory* globalPerfData =
2789             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2790         globalPerfData) {
2791         IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2792         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2793         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2794         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2795         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2796         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2797         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2798 
2799         CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2800         CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2801         CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2802         CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2803         CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2804         CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2805     }
2806     // go through and count combined draw counts for tracing
2807     PerfCounters counters;
2808     for (auto& timer : timers_) {
2809         CopyPerfCounters(timer.second.perfCounters, counters);
2810         timer.second.perfCounters = {}; // reset perf counters
2811     }
2812 
2813     CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(counters.drawCount));
2814     CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(counters.drawIndirectCount));
2815     CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(counters.dispatchCount));
2816     CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(counters.dispatchIndirectCount));
2817     CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(counters.renderPassCount));
2818     CORE_PROFILER_PLOT("Bind pipeline count", static_cast<int64_t>(counters.bindPipelineCount));
2819     CORE_PROFILER_PLOT("Bind descriptor set count", static_cast<int64_t>(counters.bindDescriptorSetCount));
2820     CORE_PROFILER_PLOT("Update descriptor set count", static_cast<int64_t>(counters.updateDescriptorSetCount));
2821     CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(counters.instanceCount));
2822     CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(counters.triangleCount));
2823 }
2824 
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2825 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2826     const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2827 {
2828 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2829     if (stateCache.secondaryCommandBuffer) {
2830         return; // cannot be called inside render pass (e.g. with secondary command buffers)
2831     }
2832     PLUGIN_ASSERT(timers_.count(name) == 1);
2833     const PerfDataSet* perfDataSet = &timers_[name];
2834     if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2835         const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2836         if (platData.queryPool) {
2837             vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2838                 platData.queryPool,                   // queryPool
2839                 queryIndex,                           // firstQuery
2840                 1);                                   // queryCount
2841 
2842             vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2843                 stageFlagBits,                        // pipelineStage,
2844                 platData.queryPool,                   // queryPool,
2845                 queryIndex);                          // query
2846         }
2847     }
2848 #endif
2849 }
2850 
2851 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2852 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2853 {
2854     perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2855     perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2856     perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2857     perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2858     perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2859     perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2860     perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2861     perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2862     perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2863     perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2864 }
2865 } // namespace
2866 
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)2867 void RenderBackendVk::CopyPerfTimeStamp(
2868     const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2869 {
2870     PLUGIN_ASSERT(timers_.count(name) == 1);
2871     PerfDataSet* const perfDataSet = &timers_[name];
2872 
2873 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2874     // take data from earlier queries to cpu
2875     // and copy in from query to gpu buffer
2876     const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2877     int64_t gpuMicroSeconds = 0;
2878     {
2879         auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2880         auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2881 
2882         const uint64_t startStamp = *currentData;
2883         const uint64_t endStamp = *(currentData + 1);
2884 
2885         const double timestampPeriod =
2886             static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2887                                     .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2888         constexpr int64_t nanosToMicrosDivisor { 1000 };
2889         gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2890         constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2891         if (gpuMicroSeconds > maxValidMicroSecondValue) {
2892             gpuMicroSeconds = 0;
2893         }
2894         perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2895     }
2896 #endif
2897     const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2898 
2899     if (IPerformanceDataManagerFactory* globalPerfData =
2900             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2901         globalPerfData) {
2902         IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2903 
2904         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2905 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2906         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2907 
2908         // cannot be called inside render pass (e.g. with secondary command buffers)
2909         if (!stateCache.secondaryCommandBuffer) {
2910             if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2911                 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2912 
2913                 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2914                 PLUGIN_ASSERT(gpuBuffer);
2915                 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2916 
2917                 constexpr uint32_t queryCount = 2;
2918                 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2919                 constexpr VkQueryResultFlags queryResultFlags =
2920                     VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2921 
2922                 if (platData.queryPool) {
2923                     vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2924                         platData.queryPool,                         // queryPool
2925                         0,                                          // firstQuery
2926                         queryCount,                                 // queryCount
2927                         platBuffer.buffer,                          // dstBuffer
2928                         currentFrameByteOffset,                     // dstOffset
2929                         queryStride,                                // stride
2930                         queryResultFlags);                          // flags
2931                 }
2932             }
2933         }
2934 #endif
2935         UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2936     }
2937 }
2938 
2939 #endif
2940 RENDER_END_NAMESPACE()
2941