• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_vk.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34 
35 #include "perf/cpu_perf_scope.h"
36 #if (RENDER_PERF_ENABLED == 1)
37 #include "perf/gpu_query.h"
38 #include "perf/gpu_query_manager.h"
39 #include "vulkan/gpu_query_vk.h"
40 #endif
41 
42 #include "device/gpu_resource_handle_util.h"
43 #include "device/gpu_resource_manager.h"
44 #include "nodecontext/node_context_descriptor_set_manager.h"
45 #include "nodecontext/node_context_pool_manager.h"
46 #include "nodecontext/node_context_pso_manager.h"
47 #include "nodecontext/render_barrier_list.h"
48 #include "nodecontext/render_command_list.h"
49 #include "nodecontext/render_node_graph_node_store.h"
50 #include "render_backend.h"
51 #include "util/log.h"
52 #include "util/render_frame_util.h"
53 #include "vulkan/gpu_buffer_vk.h"
54 #include "vulkan/gpu_image_vk.h"
55 #include "vulkan/gpu_sampler_vk.h"
56 #include "vulkan/gpu_semaphore_vk.h"
57 #include "vulkan/node_context_descriptor_set_manager_vk.h"
58 #include "vulkan/node_context_pool_manager_vk.h"
59 #include "vulkan/pipeline_state_object_vk.h"
60 #include "vulkan/render_frame_sync_vk.h"
61 #include "vulkan/swapchain_vk.h"
62 #include "vulkan/validate_vk.h"
63 
64 using namespace BASE_NS;
65 
66 using CORE_NS::GetInstance;
67 using CORE_NS::IParallelTaskQueue;
68 using CORE_NS::IPerformanceDataManager;
69 using CORE_NS::IPerformanceDataManagerFactory;
70 using CORE_NS::ITaskQueueFactory;
71 using CORE_NS::IThreadPool;
72 
73 RENDER_BEGIN_NAMESPACE()
74 namespace {
75 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)76 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
77 {
78     dst.drawCount += src.drawCount;
79     dst.drawIndirectCount += src.drawIndirectCount;
80     dst.dispatchCount += src.dispatchCount;
81     dst.dispatchIndirectCount += src.dispatchIndirectCount;
82     dst.bindPipelineCount += src.bindPipelineCount;
83     dst.renderPassCount += src.renderPassCount;
84     dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
85     dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
86     dst.triangleCount += src.triangleCount;
87     dst.instanceCount += src.instanceCount;
88 }
89 #endif
90 
ProcessBackendPositionCommands(IDevice & device,const RenderBackendCommandPosition position,const array_view<const ProcessBackendCommand> commands)91 inline void ProcessBackendPositionCommands(IDevice& device, const RenderBackendCommandPosition position,
92     const array_view<const ProcessBackendCommand> commands)
93 {
94     for (const auto& ref : commands) {
95         if ((position == ref.backendCommandPosition) && ref.command) {
96             ref.command->ExecuteBackendCommand(device);
97         }
98     }
99 }
100 } // namespace
101 
102 // Helper class for running std::function as a ThreadPool task.
103 class FunctionTask final : public IThreadPool::ITask {
104 public:
Create(std::function<void ()> func)105     static Ptr Create(std::function<void()> func)
106     {
107         return Ptr { new FunctionTask(BASE_NS::move(func)) };
108     }
109 
FunctionTask(std::function<void ()> func)110     explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
111 
operator ()()112     void operator()() override
113     {
114         func_();
115     }
116 
117 protected:
Destroy()118     void Destroy() override
119     {
120         delete this;
121     }
122 
123 private:
124     std::function<void()> func_;
125 };
126 
127 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
128 namespace {
129 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
130 }
131 #endif
132 
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,CORE_NS::ITaskQueue * const queue)133 RenderBackendVk::RenderBackendVk(Device& dev, GpuResourceManager& gpuResourceManager, CORE_NS::ITaskQueue* const queue)
134     : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
135       queue_(queue)
136 {
137 #if (RENDER_PERF_ENABLED == 1)
138 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
139     gpuQueryMgr_ = make_unique<GpuQueryManager>();
140 
141     constexpr uint32_t maxQueryObjectCount { 512u };
142     constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
143     const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
144     const GpuBufferDesc desc {
145         BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT,                        // usageFlags
146         CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
147         0,                                                                              // engineCreationFlags
148         fullByteSize,                                                                   // byteSize
149     };
150     perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
151     perfGpuTimerData_.currentOffset = 0;
152     perfGpuTimerData_.frameByteSize = byteSize;
153     perfGpuTimerData_.fullByteSize = fullByteSize;
154     { // zero initialize
155         uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
156         memset_s(bufferData, fullByteSize, 0, fullByteSize);
157         perfGpuTimerData_.gpuBuffer->Unmap();
158     }
159 #endif
160 #endif
161 }
162 
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)163 void RenderBackendVk::AcquirePresentationInfo(
164     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
165 {
166     RENDER_CPU_PERF_SCOPE("AcquirePresentationInfo", "");
167     if (device_.HasSwapchain()) {
168         presentationData_.present = true;
169         // resized to same for convenience
170         presentationData_.infos.resize(backBufferConfig.swapchainData.size());
171         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
172             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
173             PresentationInfo pi;
174             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175 
176             if (const auto* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
177                 swapchain) {
178                 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179                 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180                 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
181                 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
182                 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
183                 pi.swapchain = platSwapchain.swapchain;
184                 pi.useSwapchain = true;
185                 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
186                 for (const auto& piRef : presentationData_.infos) {
187                     if (piRef.swapchain == pi.swapchain) {
188                         pi.useSwapchain = false;
189                     }
190                 }
191                 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
192                 if (pi.useSwapchain) {
193                     const VkResult result = vkAcquireNextImageKHR(device, // device
194                         vkSwapchain,                                      // swapchin
195                         UINT64_MAX,                                       // timeout
196                         pi.swapchainSemaphore,                            // semaphore
197                         (VkFence) nullptr,                                // fence
198                         &pi.swapchainImageIndex);                         // pImageIndex
199 
200                     switch (result) {
201                         // Success
202                         case VK_SUCCESS:
203                         case VK_TIMEOUT:
204                         case VK_NOT_READY:
205                         case VK_SUBOPTIMAL_KHR:
206                             pi.validAcquire = true;
207                             break;
208 
209                         // Failure
210                         case VK_ERROR_OUT_OF_HOST_MEMORY:
211                         case VK_ERROR_OUT_OF_DEVICE_MEMORY:
212                             PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
213                             return;
214                         case VK_ERROR_DEVICE_LOST:
215                             PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
216                             return;
217                         case VK_ERROR_OUT_OF_DATE_KHR:
218                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
219                             return;
220                         case VK_ERROR_SURFACE_LOST_KHR:
221                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
222                             return;
223 
224                         case VK_EVENT_SET:
225                         case VK_EVENT_RESET:
226                         case VK_INCOMPLETE:
227                         case VK_ERROR_INITIALIZATION_FAILED:
228                         case VK_ERROR_MEMORY_MAP_FAILED:
229                         case VK_ERROR_LAYER_NOT_PRESENT:
230                         case VK_ERROR_EXTENSION_NOT_PRESENT:
231                         case VK_ERROR_FEATURE_NOT_PRESENT:
232                         case VK_ERROR_INCOMPATIBLE_DRIVER:
233                         case VK_ERROR_TOO_MANY_OBJECTS:
234                         case VK_ERROR_FORMAT_NOT_SUPPORTED:
235                         case VK_ERROR_FRAGMENTED_POOL:
236                         case VK_ERROR_OUT_OF_POOL_MEMORY:
237                         case VK_ERROR_INVALID_EXTERNAL_HANDLE:
238                         case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
239                         case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
240                         case VK_ERROR_VALIDATION_FAILED_EXT:
241                         case VK_ERROR_INVALID_SHADER_NV:
242                         // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
243                         case VK_ERROR_FRAGMENTATION_EXT:
244                         case VK_ERROR_NOT_PERMITTED_EXT:
245                         // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
246                         case VK_RESULT_MAX_ENUM:
247                         default:
248                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
249                             PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
250                             device_.SetDeviceStatus(false);
251                             break;
252                     }
253 
254                     if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
255                         PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
256                             static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
257                     }
258 
259                     const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
260                     const RenderHandle handle = swapchainData.remappableSwapchainImage;
261                     if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
262                         // remap image to backbuffer
263                         const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
264                         // special swapchain remapping
265                         gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
266                     }
267                     pi.renderGraphProcessedState = swapData.backBufferState;
268                     pi.imageLayout = swapData.layout;
269                     if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
270                         pi.presentationLayoutChangeNeeded = true;
271                         pi.renderNodeCommandListIndex =
272                             static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
273 
274                         if (const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle); swapImage) {
275                             pi.swapchainImage = swapImage->GetPlatformData().image;
276                         }
277                     }
278                 }
279             }
280             presentationData_.infos[swapIdx] = pi;
281         }
282     }
283 }
284 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)285 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
286 {
287     if (!queue_) {
288         return;
289     }
290     // before presentation commands
291     ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::BEFORE_PRESENTATION, processBackendCommands_);
292 
293     if (!backBufferConfig.swapchainData.empty()) {
294         if (device_.HasSwapchain() && presentationData_.present) {
295             PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
296             uint32_t swapchainCount = 0U;
297             VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
298                 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
299             uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
300             for (const auto& presRef : presentationData_.infos) {
301                 // NOTE: default backbuffer might be present multiple times
302                 // the flag useSwapchain should be false in these cases
303                 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
304                     PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
305                     vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
306                     vkSwapchains[swapchainCount++] = presRef.swapchain;
307                 }
308             }
309 #if (RENDER_PERF_ENABLED == 1)
310             commonCpuTimers_.present.Begin();
311 #endif
312 
313             // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
314             // semaphore)
315             if (swapchainCount > 0U) {
316                 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
317                 uint32_t waitSemaphoreCount = 0;
318                 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
319                     waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
320                     waitSemaphoreCount = 1;
321                 }
322 
323                 const VkPresentInfoKHR presentInfo {
324                     VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
325                     nullptr,                            // pNext
326                     waitSemaphoreCount,                 // waitSemaphoreCount
327                     &waitSemaphore,                     // pWaitSemaphores
328                     swapchainCount,                     // swapchainCount
329                     vkSwapchains,                       // pSwapchains
330                     vkSwapImageIndices,                 // pImageIndices
331                     nullptr                             // pResults
332                 };
333 
334                 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
335                 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
336                     &presentInfo);                                             // pPresentInfo
337 
338                 switch (result) {
339                         // Success
340                     case VK_SUCCESS:
341                         break;
342                     case VK_SUBOPTIMAL_KHR:
343 #if (RENDER_VALIDATION_ENABLED == 1)
344                         PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
345 #endif
346                         break;
347 
348                         // Failure
349                     case VK_ERROR_OUT_OF_HOST_MEMORY:
350                     case VK_ERROR_OUT_OF_DEVICE_MEMORY:
351                         PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
352                         return;
353                     case VK_ERROR_DEVICE_LOST:
354                         PLUGIN_LOG_E("vkQueuePresentKHR device lost");
355                         return;
356                     case VK_ERROR_OUT_OF_DATE_KHR:
357                         PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
358                         return;
359                     case VK_ERROR_SURFACE_LOST_KHR:
360                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
361                         return;
362 
363                     case VK_NOT_READY:
364                     case VK_TIMEOUT:
365                     case VK_EVENT_SET:
366                     case VK_EVENT_RESET:
367                     case VK_INCOMPLETE:
368                     case VK_ERROR_INITIALIZATION_FAILED:
369                     case VK_ERROR_MEMORY_MAP_FAILED:
370                     case VK_ERROR_LAYER_NOT_PRESENT:
371                     case VK_ERROR_EXTENSION_NOT_PRESENT:
372                     case VK_ERROR_FEATURE_NOT_PRESENT:
373                     case VK_ERROR_INCOMPATIBLE_DRIVER:
374                     case VK_ERROR_TOO_MANY_OBJECTS:
375                     case VK_ERROR_FORMAT_NOT_SUPPORTED:
376                     case VK_ERROR_FRAGMENTED_POOL:
377                     case VK_ERROR_OUT_OF_POOL_MEMORY:
378                     case VK_ERROR_INVALID_EXTERNAL_HANDLE:
379                     case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
380                     case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
381                     case VK_ERROR_VALIDATION_FAILED_EXT:
382                     case VK_ERROR_INVALID_SHADER_NV:
383                     case VK_ERROR_FRAGMENTATION_EXT:
384                     case VK_ERROR_NOT_PERMITTED_EXT:
385                     case VK_RESULT_MAX_ENUM:
386                     default:
387                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
388                         PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
389                         break;
390                 }
391             }
392 #if (RENDER_PERF_ENABLED == 1)
393             commonCpuTimers_.present.End();
394 #endif
395         } else {
396 #if (RENDER_VALIDATION_ENABLED == 1)
397             PLUGIN_LOG_ONCE_E(
398                 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
399 #endif
400         }
401     }
402 
403     // after presentation backend commands
404     ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::AFTER_PRESENTATION, processBackendCommands_);
405 
406     // clear after presentation (also cleared in start of render backend)
407     processBackendCommands_.clear();
408 }
409 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)410 void RenderBackendVk::Render(
411     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
412 {
413     if (!queue_) {
414         return;
415     }
416 
417     // NOTE: all command lists are validated before entering here
418 #if (RENDER_PERF_ENABLED == 1)
419     commonCpuTimers_.full.Begin();
420     commonCpuTimers_.acquire.Begin();
421 #endif
422 
423     // clear backend commands
424     processBackendCommands_.clear();
425 
426     commandBufferSubmitter_ = {};
427     commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
428 
429     presentationData_.present = false;
430     presentationData_.infos.clear();
431 
432 #if (RENDER_PERF_ENABLED == 1)
433     commonCpuTimers_.acquire.End();
434 
435     StartFrameTimers(renderCommandFrameData);
436     commonCpuTimers_.execute.Begin();
437 #endif
438 
439     // global begin backend frame
440     auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
441     descriptorSetMgr.BeginBackendFrame();
442 
443     // command list process loop/execute
444     // first tries to acquire swapchain if needed in a task
445     RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
446 
447 #if (RENDER_PERF_ENABLED == 1)
448     commonCpuTimers_.execute.End();
449     commonCpuTimers_.submit.Begin();
450 #endif
451 
452     PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
453     // submit vulkan command buffers
454     // checks that presentation info has valid acquire
455     RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
456 
457 #if (RENDER_PERF_ENABLED == 1)
458     commonCpuTimers_.submit.End();
459     commonCpuTimers_.full.End();
460     EndFrameTimers();
461 #endif
462 }
463 
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)464 void RenderBackendVk::RenderProcessSubmitCommandLists(
465     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
466 {
467     // NOTE: currently backtraces to final valid command buffer semaphore
468     uint32_t finalCommandBufferSubmissionIndex = ~0u;
469     commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
470     bool swapchainSemaphoreWaited = false;
471     for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
472          --cmdBufferIdx) {
473         if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
474             (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
475                 VK_NULL_HANDLE)) {
476             finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
477             break;
478         }
479     }
480 
481     for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
482         const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
483         if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
484             continue;
485         }
486 
487         const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
488 
489         uint32_t waitSemaphoreCount = 0u;
490         constexpr const uint32_t maxWaitSemaphoreCount =
491             PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
492         VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
493         VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
494         for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
495             const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
496             PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
497 
498             VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
499             if (waitSemaphore != VK_NULL_HANDLE) {
500                 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
501                 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
502                 waitSemaphoreCount++;
503             }
504         }
505 
506         if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
507             (!presentationData_.infos.empty())) {
508             swapchainSemaphoreWaited = true;
509             // go through all swapchain semaphores
510             for (const auto& presRef : presentationData_.infos) {
511                 if (presRef.swapchainSemaphore) {
512                     waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
513                     waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
514                     waitSemaphoreCount++;
515                 }
516             }
517         }
518 
519         uint32_t signalSemaphoreCount = 0u;
520         PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
521         constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
522         VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
523             VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
524         VkFence fence = VK_NULL_HANDLE;
525         if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
526             // add fence signaling to last submission for frame sync
527             if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
528                 fence = frameSync->GetFrameFence().fence;
529                 frameSync->FrameFenceIsSignalled();
530             }
531             // signal external semaphores
532             if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
533                 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
534                 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
535                 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
536                 if (externalSignals.size() == externalSemaphores.size()) {
537                     for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
538                         // needs to be false
539                         if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
540                             if (const auto* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get(); gs) {
541                                 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
542                                 externalSignals[sigIdx].signaled = true;
543                             }
544                         }
545                     }
546                 }
547             }
548 
549             if (presentationData_.present) {
550                 commandBufferSubmitter_.presentationWaitSemaphore =
551                     commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
552                 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
553             }
554             // add additional semaphores
555             for (const auto& swapRef : backBufferConfig.swapchainData) {
556                 // should have been checked in render graph already
557                 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
558                     semaphores[signalSemaphoreCount++] =
559                         VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
560                 }
561             }
562         } else if (renderContextRef.submitDepencies.signalSemaphore) {
563             semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
564         }
565         PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
566 
567         const VkSubmitInfo submitInfo {
568             VK_STRUCTURE_TYPE_SUBMIT_INFO,                        // sType
569             nullptr,                                              // pNext
570             waitSemaphoreCount,                                   // waitSemaphoreCount
571             (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
572             waitSemaphorePipelineStageFlags,                      // pWaitDstStageMask
573             1,                                                    // commandBufferCount
574             &cmdSubmitterRef.commandBuffer,                       // pCommandBuffers
575             signalSemaphoreCount,                                 // signalSemaphoreCount
576             (signalSemaphoreCount == 0) ? nullptr : semaphores,   // pSignalSemaphores
577         };
578 
579         const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
580         if (queue) {
581             RENDER_CPU_PERF_SCOPE("vkQueueSubmit", "");
582             VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
583                 1,                                  // submitCount
584                 &submitInfo,                        // pSubmits
585                 fence));                            // fence
586         }
587     }
588 }
589 
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)590 void RenderBackendVk::RenderProcessCommandLists(
591     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
592 {
593     // queue checked in upper level
594 
595     const auto cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
596     constexpr uint64_t acquireTaskId { 0xFFFFffff0 };
597     constexpr uint64_t globalDescSetTaskId { 0xFFFFffff1 };
598     bool acquireSubmitted { false };
599     bool globalDescSetSubmitted { false };
600     vector<uint64_t> afterIdentifiers;
601     afterIdentifiers.reserve(2U); // global descriptor sets, and swapchain acquire wait
602     // submit global descset task if needed
603     {
604         auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
605         const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
606         if (!allDescSets.empty()) {
607             globalDescSetSubmitted = true;
608             queue_->Submit(globalDescSetTaskId, FunctionTask::Create([this]() { UpdateGlobalDescriptorSets(); }));
609         }
610     }
611     // submit acquire task if needed
612     if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
613         acquireSubmitted = true;
614         queue_->Submit(acquireTaskId, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
615             AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
616         }));
617     }
618     uint64_t secondaryIdx = cmdBufferCount;
619     for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
620         afterIdentifiers.clear();
621         // add wait for acquire if needed
622         if (acquireSubmitted && (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx)) {
623             afterIdentifiers.push_back(acquireTaskId);
624         }
625         // NOTE: idx increase
626         const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
627         const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
628         PLUGIN_ASSERT(mrpData.subpassCount > 0);
629         const uint32_t rcCount = mrpData.subpassCount;
630         if (rcCount > (cmdBufferCount - cmdBufferIdx)) {
631             PLUGIN_LOG_E("Invalid render command context");
632             continue;
633         }
634 
635         // add backend position commands
636         const auto& backendCommands = ref.renderCommandList->GetProcessBackendCommands();
637         processBackendCommands_.append(backendCommands.begin(), backendCommands.end());
638 
639         // add wait for global descriptor sets if needed
640         // add safety wait for secondary command lists always (NOTE: needs to further optimized)
641         bool hasGlobalDescriptorSetBindings = false;
642         if (globalDescSetSubmitted) {
643             auto first = renderCommandFrameData.renderCommandContexts.cbegin() + cmdBufferIdx;
644             auto last = first + rcCount;
645             hasGlobalDescriptorSetBindings = std::any_of(first, last, [](const RenderCommandContext& ref) {
646                 return ref.renderCommandList->HasGlobalDescriptorSetBindings();
647             });
648         }
649         if (globalDescSetSubmitted && (mrpData.secondaryCmdLists || hasGlobalDescriptorSetBindings)) {
650             afterIdentifiers.push_back(globalDescSetTaskId);
651         }
652         if (mrpData.secondaryCmdLists) {
653             afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
654             for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
655                 const uint64_t submitId = secondaryIdx++;
656                 afterIdentifiers.push_back(submitId);
657                 queue_->SubmitAfter(afterIdentifiers, submitId,
658                     FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
659                         const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
660                         MultiRenderCommandListDesc mrcDesc;
661                         mrcDesc.multiRenderCommandListCount = 1u;
662                         mrcDesc.baseContext = nullptr;
663                         mrcDesc.secondaryCommandBuffer = true;
664                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
665                         const DebugNames debugNames { ref2.debugName,
666                             renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
667                         RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
668                     }));
669             }
670             queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
671                 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
672                     MultiRenderCommandListDesc mrcDesc;
673                     mrcDesc.multiRenderCommandListCount = rcCount;
674                     RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
675                     const DebugNames debugNames { ref2.debugName, ref2.debugName };
676                     RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
677                 }));
678         } else {
679             queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
680                 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
681                     MultiRenderCommandListDesc mrcDesc;
682                     mrcDesc.multiRenderCommandListCount = rcCount;
683                     if (rcCount > 1) {
684                         mrcDesc.multiRenderNodeCmdList = true;
685                         mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
686                     }
687                     for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
688                         const uint32_t currIdx = cmdBufferIdx + rcIdx;
689                         mrcDesc.multiRenderCommandListIndex = rcIdx;
690                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
691                         const DebugNames debugNames { ref2.debugName,
692                             renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
693                         RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
694                     }
695                 }));
696         }
697         // idx increase
698         cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
699     }
700 
701     // process before acquire commands here
702     ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::BEFORE_ACQUIRE, processBackendCommands_);
703 
704     // execute and wait for completion.
705     queue_->Execute();
706     queue_->Clear();
707 }
708 
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)709 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
710     RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
711     const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
712 {
713     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
714     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
715     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
716 
717     const ContextCommandPoolVk& ptrCmdPool =
718         (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
719     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
720 
721     // begin cmd buffer
722     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
723     constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
724     const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
725     if (valid) {
726         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
727             ptrCmdPool.commandPool,                   // commandPool
728             commandPoolResetFlags));                  // flags
729     }
730 
731     constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
732         VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
733     };
734     const VkCommandBufferBeginInfo commandBufferBeginInfo {
735         VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
736         nullptr,                                     // pNext
737         commandBufferUsageFlags,                     // flags
738         nullptr,                                     // pInheritanceInfo
739     };
740     if (valid) {
741         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
742             &commandBufferBeginInfo));                                   // pBeginInfo
743     }
744 
745     StateCache stateCache;
746 
747     const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
748     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
749     const auto commandCount = static_cast<uint32_t>(rcRef.size());
750     const RenderCommandBeginRenderPass* rcBeginRenderPass =
751         (mrpcld.rpBeginCmdIndex < commandCount)
752             ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
753             : nullptr;
754     const RenderCommandEndRenderPass* rcEndRenderPass =
755         (mrpcld.rpEndCmdIndex < commandCount)
756             ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
757             : nullptr;
758 
759     if (rcBeginRenderPass && rcEndRenderPass) {
760         if (mrpcld.rpBarrierCmdIndex < commandCount) {
761             const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
762             PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
763             const RenderCommandBarrierPoint& barrierPoint =
764                 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
765             // handle all barriers before render command that needs resource syncing
766             RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
767         }
768 
769         // begin render pass
770         stateCache.primaryRenderPass = true;
771         RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
772         stateCache.primaryRenderPass = false;
773 
774         // get secondary command buffers from correct indices and execute
775         for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
776             const uint32_t currCmdBufIdx = cmdBufIdx + idx;
777             PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
778             const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
779             NodeContextPoolManagerVk& contextPoolVk =
780                 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
781 
782             const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
783             const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
784             const auto mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
785             // next subpass only called from second render pass on
786             if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
787                 RenderCommandBeginRenderPass renderPass =
788                     *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
789                 renderPass.renderPassDesc.subpassContents =
790                     SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
791                 stateCache.renderCommandBeginRenderPass = nullptr; // reset
792                 RenderCommand(
793                     renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
794             }
795             RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
796         }
797 
798         // end render pass (replace the primary render pass)
799         stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
800         // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
801         constexpr RenderCommandEndRenderPass rcerp = {};
802         RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
803     }
804 
805     // end cmd buffer
806     if (valid) {
807         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
808     }
809 
810     commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
811 }
812 
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)813 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
814     const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
815 {
816     if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
817         vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
818             1u,                                       // commandBufferCount
819             &executeCmdBuffer.commandBuffer);         // pCommandBuffers
820     }
821 }
822 
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)823 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
824     const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
825 {
826     auto& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
827 
828     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
829     const auto cmdCount = static_cast<uint32_t>(rcRef.size());
830 
831     const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
832     PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
833     PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
834     if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
835         const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
836         PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
837         const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
838         LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
839 
840         const uint32_t subpass = renderCmd.subpassStartIndex;
841         return VkCommandBufferInheritanceInfo {
842             VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
843             nullptr,                                           // pNext
844             lowLevelRenderPassData.renderPass,                 // renderPass
845             subpass,                                           // subpass
846             VK_NULL_HANDLE,                                    // framebuffer
847             VK_FALSE,                                          // occlusionQueryEnable
848             0,                                                 // queryFlags
849             0,                                                 // pipelineStatistics
850         };
851     } else {
852         return VkCommandBufferInheritanceInfo {};
853     }
854 }
855 
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)856 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
857     const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
858 {
859     // these are validated in render graph
860     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
861     const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
862     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
863     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
864     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
865 
866 #if (RENDER_PERF_ENABLED == 1)
867 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
868     const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
869     const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
870 #endif
871     PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
872     PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
873     if (perfDataSet) {
874         perfDataSet->cpuTimer.Begin();
875     }
876 
877     RENDER_CPU_PERF_SCOPE("RenderSingleCommandList", debugNames.renderCommandBufferName);
878 #endif
879 
880     contextPoolMgr.BeginBackendFrame();
881     ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
882     nodeContextPsoMgr.BeginBackendFrame();
883 
884     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
885 
886     StateCache stateCache = {}; // state cache for this render command list
887     stateCache.backendNode = renderCommandCtx.renderBackendNode;
888     stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
889 
890     // command buffer has been wait with a single frame fence
891     const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
892     const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
893     const bool endCommandBuffer =
894         (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
895     const ContextCommandPoolVk* ptrCmdPool = nullptr;
896     if (mrclDesc.multiRenderNodeCmdList) {
897         PLUGIN_ASSERT(mrclDesc.baseContext);
898         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
899                           ->GetContextCommandPool();
900     } else if (mrclDesc.secondaryCommandBuffer) {
901         PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
902         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
903     } else {
904         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
905     }
906 
907     // update cmd list context descriptor sets
908     UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
909 
910     PLUGIN_ASSERT(ptrCmdPool);
911     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
912 
913     if (beginCommandBuffer) {
914         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
915         constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
916         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
917             ptrCmdPool->commandPool,                  // commandPool
918             commandPoolResetFlags));                  // flags
919 
920         VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
921         VkCommandBufferInheritanceInfo inheritanceInfo {};
922         if (stateCache.secondaryCommandBuffer) {
923             commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
924             inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
925         }
926         const VkCommandBufferBeginInfo commandBufferBeginInfo {
927             VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,                    // sType
928             nullptr,                                                        // pNext
929             commandBufferUsageFlags,                                        // flags
930             mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
931         };
932 
933         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
934             &commandBufferBeginInfo));                                   // pBeginInfo
935 
936 #if (RENDER_PERF_ENABLED == 1)
937 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
938         if (validGpuQueries) {
939             GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
940             PLUGIN_ASSERT(gpuQuery);
941 
942             gpuQuery->NextQueryIndex();
943 
944             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
945                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
946         }
947 #endif
948 #endif
949     }
950 
951 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
952     {
953         BeginDebugMarker(cmdBuffer, debugNames.renderCommandListName, { 1.f, 1.f, 1.f, 1.f });
954     }
955 #endif
956 
957     for (const auto& ref : rcRef) {
958         if (!stateCache.validCommandList) {
959 #if (RENDER_VALIDATION_ENABLED == 1)
960             PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
961                 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
962                 debugNames.renderCommandListName.data());
963 #endif
964             break;
965         }
966 
967         PLUGIN_ASSERT(ref.rc);
968 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
969         {
970             const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
971             BeginDebugMarker(cmdBuffer, COMMAND_NAMES[index], { 0.87f, 0.83f, 0.29f, 1.f });
972         }
973 #endif
974 
975         switch (ref.type) {
976             case RenderCommandType::BARRIER_POINT: {
977                 if (!stateCache.secondaryCommandBuffer) {
978                     const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
979                     // handle all barriers before render command that needs resource syncing
980                     RenderCommand(
981                         barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
982                 }
983                 break;
984             }
985             case RenderCommandType::DRAW: {
986                 RenderCommand(
987                     *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
988                 break;
989             }
990             case RenderCommandType::DRAW_INDIRECT: {
991                 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
992                     contextPoolMgr, stateCache);
993                 break;
994             }
995             case RenderCommandType::DISPATCH: {
996                 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
997                     contextPoolMgr, stateCache);
998                 break;
999             }
1000             case RenderCommandType::DISPATCH_INDIRECT: {
1001                 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1002                     contextPoolMgr, stateCache);
1003                 break;
1004             }
1005             case RenderCommandType::BIND_PIPELINE: {
1006                 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1007                     contextPoolMgr, stateCache);
1008                 break;
1009             }
1010             case RenderCommandType::BEGIN_RENDER_PASS: {
1011                 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1012                     contextPoolMgr, stateCache);
1013                 break;
1014             }
1015             case RenderCommandType::NEXT_SUBPASS: {
1016                 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1017                     contextPoolMgr, stateCache);
1018                 break;
1019             }
1020             case RenderCommandType::END_RENDER_PASS: {
1021                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1022                     contextPoolMgr, stateCache);
1023                 break;
1024             }
1025             case RenderCommandType::BIND_VERTEX_BUFFERS: {
1026                 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1027                     contextPoolMgr, stateCache);
1028                 break;
1029             }
1030             case RenderCommandType::BIND_INDEX_BUFFER: {
1031                 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1032                     contextPoolMgr, stateCache);
1033                 break;
1034             }
1035             case RenderCommandType::COPY_BUFFER: {
1036                 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1037                     contextPoolMgr, stateCache);
1038                 break;
1039             }
1040             case RenderCommandType::COPY_BUFFER_IMAGE: {
1041                 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1042                     contextPoolMgr, stateCache);
1043                 break;
1044             }
1045             case RenderCommandType::COPY_IMAGE: {
1046                 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1047                     contextPoolMgr, stateCache);
1048                 break;
1049             }
1050             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1051                 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1052                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1053                 break;
1054             }
1055             case RenderCommandType::PUSH_CONSTANT: {
1056                 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1057                     contextPoolMgr, stateCache);
1058                 break;
1059             }
1060             case RenderCommandType::BLIT_IMAGE: {
1061                 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1062                     contextPoolMgr, stateCache);
1063                 break;
1064             }
1065             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1066                 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1067                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1068                 break;
1069             }
1070             case RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES: {
1071                 RenderCommand(*static_cast<RenderCommandCopyAccelerationStructureInstances*>(ref.rc), cmdBuffer,
1072                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1073                 break;
1074             }
1075             case RenderCommandType::CLEAR_COLOR_IMAGE: {
1076                 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1077                     contextPoolMgr, stateCache);
1078                 break;
1079             }
1080             // dynamic states
1081             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1082                 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1083                     contextPoolMgr, stateCache);
1084                 break;
1085             }
1086             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1087                 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1088                     contextPoolMgr, stateCache);
1089                 break;
1090             }
1091             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1092                 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1093                     contextPoolMgr, stateCache);
1094                 break;
1095             }
1096             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1097                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1098                     contextPoolMgr, stateCache);
1099                 break;
1100             }
1101             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1102                 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1103                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1104                 break;
1105             }
1106             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1107                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1108                     contextPoolMgr, stateCache);
1109                 break;
1110             }
1111             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1112                 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1113                     contextPoolMgr, stateCache);
1114                 break;
1115             }
1116             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1117                 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1118                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1119                 break;
1120             }
1121             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1122                 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1123                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1124                 break;
1125             }
1126             //
1127             case RenderCommandType::WRITE_TIMESTAMP: {
1128                 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1129                     contextPoolMgr, stateCache);
1130                 break;
1131             }
1132             case RenderCommandType::UNDEFINED:
1133             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1134             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1135             case RenderCommandType::BEGIN_DEBUG_MARKER:
1136 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1137                 RenderCommand(*static_cast<RenderCommandBeginDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1138                     contextPoolMgr, stateCache);
1139 #endif
1140                 break;
1141             case RenderCommandType::END_DEBUG_MARKER:
1142 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1143                 RenderCommand(*static_cast<RenderCommandEndDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1144                     contextPoolMgr, stateCache);
1145 #endif
1146                 break;
1147             default: {
1148                 PLUGIN_ASSERT(false && "non-valid render command");
1149                 break;
1150             }
1151         }
1152 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1153         {
1154             EndDebugMarker(cmdBuffer);
1155         }
1156 #endif
1157     }
1158 
1159     if ((!presentationData_.infos.empty())) {
1160         RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1161     }
1162 
1163 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1164     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1165         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1166     }
1167 #endif
1168 
1169 #if (RENDER_PERF_ENABLED == 1)
1170     // copy counters
1171     if (perfDataSet) {
1172         CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1173     }
1174 #endif
1175 
1176     if (endCommandBuffer) {
1177 #if (RENDER_PERF_ENABLED == 1)
1178         if (perfDataSet) {
1179             perfDataSet->cpuTimer.End();
1180         }
1181 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1182         if (validGpuQueries) {
1183             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1184                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1185         }
1186 #endif
1187         CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1188 #endif
1189 
1190         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1191 
1192         if (mrclDesc.secondaryCommandBuffer) {
1193             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1194         } else {
1195             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1196         }
1197     }
1198 }
1199 
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1200 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1201     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1202 {
1203     const RenderHandle psoHandle = renderCmd.psoHandle;
1204     const auto pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1205 
1206     stateCache.psoHandle = psoHandle;
1207 
1208     VkPipeline pipeline { VK_NULL_HANDLE };
1209     VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1210     if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1211         const auto* pso = static_cast<const ComputePipelineStateObjectVk*>(
1212             psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1213         if (pso) {
1214             const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1215             pipeline = plat.pipeline;
1216             pipelineLayout = plat.pipelineLayout;
1217         }
1218     } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1219         PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1220         if (stateCache.renderCommandBeginRenderPass) {
1221             uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1222             if (stateCache.pipelineDescSetHash != 0) {
1223                 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1224             }
1225             const auto* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1226                 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1227                     stateCache.renderCommandBeginRenderPass->subpasses,
1228                     stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1229                     &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1230             if (pso) {
1231                 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1232                 pipeline = plat.pipeline;
1233                 pipelineLayout = plat.pipelineLayout;
1234             }
1235         }
1236     }
1237 
1238     // NOTE: render front-end expects pso binding after begin render pass
1239     // in some situations the render pass might change and therefore the pipeline changes
1240     // in some situations the render pass is the same and the rebinding is not needed
1241     const bool newPipeline = (pipeline != stateCache.pipeline);
1242     const bool valid = (pipeline != VK_NULL_HANDLE);
1243     if (valid && newPipeline) {
1244         stateCache.pipeline = pipeline;
1245         stateCache.pipelineLayout = pipelineLayout;
1246         stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1247         vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1248             pipelineBindPoint,                  // pipelineBindPoint
1249             pipeline);                          // pipeline
1250 #if (RENDER_PERF_ENABLED == 1)
1251         stateCache.perfCounters.bindPipelineCount++;
1252 #endif
1253     }
1254 }
1255 
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1256 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1257     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1258 {
1259     if (stateCache.validBindings) {
1260         if (renderCmd.indexCount) {
1261             vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1262                 renderCmd.indexCount,              // indexCount
1263                 renderCmd.instanceCount,           // instanceCount
1264                 renderCmd.firstIndex,              // firstIndex
1265                 renderCmd.vertexOffset,            // vertexOffset
1266                 renderCmd.firstInstance);          // firstInstance
1267 #if (RENDER_PERF_ENABLED == 1)
1268             stateCache.perfCounters.drawCount++;
1269             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1270             stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1271 #endif
1272         } else {
1273             vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1274                 renderCmd.vertexCount,      // vertexCount
1275                 renderCmd.instanceCount,    // instanceCount
1276                 renderCmd.firstVertex,      // firstVertex
1277                 renderCmd.firstInstance);   // firstInstance
1278 #if (RENDER_PERF_ENABLED == 1)
1279             stateCache.perfCounters.drawCount++;
1280             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1281             stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1282                                                      * renderCmd.instanceCount;
1283 #endif
1284         }
1285     }
1286 }
1287 
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1288 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1289     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1290 {
1291     if (stateCache.validBindings) {
1292         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1293             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1294             const VkBuffer buffer = plat.buffer;
1295             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1296             if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1297                 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1298                     buffer,                                    // buffer
1299                     offset,                                    // offset
1300                     renderCmd.drawCount,                       // drawCount
1301                     renderCmd.stride);                         // stride
1302             } else {
1303                 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1304                     buffer,                             // buffer
1305                     (VkDeviceSize)renderCmd.offset,     // offset
1306                     renderCmd.drawCount,                // drawCount
1307                     renderCmd.stride);                  // stride
1308             }
1309 #if (RENDER_PERF_ENABLED == 1)
1310             stateCache.perfCounters.drawIndirectCount++;
1311 #endif
1312         }
1313     }
1314 }
1315 
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1316 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1317     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1318 {
1319     if (stateCache.validBindings) {
1320         vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1321             renderCmd.groupCountX,          // groupCountX
1322             renderCmd.groupCountY,          // groupCountY
1323             renderCmd.groupCountZ);         // groupCountZ
1324 #if (RENDER_PERF_ENABLED == 1)
1325         stateCache.perfCounters.dispatchCount++;
1326 #endif
1327     }
1328 }
1329 
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1330 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1331     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1332     const StateCache& stateCache)
1333 {
1334     if (stateCache.validBindings) {
1335         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1336             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1337             const VkBuffer buffer = plat.buffer;
1338             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1339             vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1340                 buffer,                                 // buffer
1341                 offset);                                // offset
1342 #if (RENDER_PERF_ENABLED == 1)
1343             stateCache.perfCounters.dispatchIndirectCount++;
1344 #endif
1345         }
1346     }
1347 }
1348 
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1349 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1350     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1351     StateCache& stateCache)
1352 {
1353     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1354     stateCache.renderCommandBeginRenderPass = &renderCmd;
1355 
1356     auto& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1357     // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1358     stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1359 
1360     // early out for multi render command list render pass
1361     if (stateCache.secondaryCommandBuffer) {
1362         return; // early out
1363     }
1364     const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1365                             (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1366     // invalidate the whole command list
1367     if (!validRpFbo) {
1368         stateCache.validCommandList = false;
1369         return; // early out
1370     }
1371 
1372     if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1373         if (renderCmd.subpassStartIndex < renderCmd.subpasses.size()) {
1374             if ((renderCmd.subpasses[renderCmd.subpassStartIndex].subpassFlags &
1375                     SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
1376                 const auto subpassContents = static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1377                 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1378                     subpassContents);                  // contents
1379             }
1380         }
1381         return; // early out
1382     }
1383 
1384     const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1385 
1386     VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1387     bool hasClearValues = false;
1388     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1389         const auto& ref = renderPassDesc.attachments[idx];
1390         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1391             ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1392             const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1393             VkClearValue cVal;
1394             if (RenderHandleUtil::IsDepthImage(handle)) {
1395                 PLUGIN_STATIC_ASSERT(sizeof(cVal.depthStencil) == sizeof(ref.clearValue.depthStencil));
1396                 cVal.depthStencil.depth = ref.clearValue.depthStencil.depth;
1397                 cVal.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1398             } else {
1399                 PLUGIN_STATIC_ASSERT(sizeof(cVal.color) == sizeof(ref.clearValue.color));
1400                 CloneData(&cVal.color, sizeof(cVal.color), &ref.clearValue.color, sizeof(ref.clearValue.color));
1401             }
1402             clearValues[idx] = cVal;
1403             hasClearValues = true;
1404         }
1405     }
1406 
1407     // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1408     // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1409     const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1410 
1411     VkRect2D renderArea {
1412         { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1413         { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1414     };
1415     // render area needs to be inside frame buffer
1416     const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1417     renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1418     renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1419     renderArea.extent.width = Math::min(renderArea.extent.width,
1420         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1421     renderArea.extent.height = Math::min(renderArea.extent.height,
1422         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1423 
1424     const VkRenderPassBeginInfo renderPassBeginInfo {
1425         VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,      // sType
1426         nullptr,                                       // pNext
1427         stateCache.lowLevelRenderPassData.renderPass,  // renderPass
1428         stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1429         renderArea,                                    // renderArea
1430         clearValueCount,                               // clearValueCount
1431         clearValues,                                   // pClearValues
1432     };
1433 
1434     // NOTE: could be patched in render graph
1435     // const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1436     const VkSubpassContents subpassContents =
1437         stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1438     vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1439         &renderPassBeginInfo,                  // pRenderPassBegin
1440         subpassContents);                      // contents
1441 #if (RENDER_PERF_ENABLED == 1)
1442     stateCache.perfCounters.renderPassCount++;
1443 #endif
1444 }
1445 
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1446 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1447     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1448 {
1449     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1450 
1451     const auto subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1452     vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1453         subpassContents);                  // contents
1454 }
1455 
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1456 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1457     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1458 {
1459     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1460 
1461     // early out for multi render command list render pass
1462     if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1463         return; // NOTE
1464     }
1465 
1466     stateCache.renderCommandBeginRenderPass = nullptr;
1467     stateCache.lowLevelRenderPassData = {};
1468 
1469     if (!stateCache.secondaryCommandBuffer) {
1470         vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1471     }
1472 }
1473 
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1474 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1475     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1476     const StateCache& stateCache)
1477 {
1478     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1479     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1480 
1481     const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1482 
1483     VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1484     VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1485     const GpuBufferVk* gpuBuffer = nullptr;
1486     RenderHandle currBufferHandle;
1487     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1488         const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1489         // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1490         // do not re-fetch the buffer if not needed
1491         if (currBufferHandle.id != currVb.bufferHandle.id) {
1492             currBufferHandle = currVb.bufferHandle;
1493             gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1494         }
1495         if (gpuBuffer) {
1496             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1497             const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1498             vertexBuffers[idx] = plat.buffer;
1499             offsets[idx] = offset;
1500         }
1501     }
1502 
1503     vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1504         0,                                       // firstBinding
1505         vertexBufferCount,                       // bindingCount
1506         vertexBuffers,                           // pBuffers
1507         offsets);                                // pOffsets
1508 }
1509 
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1510 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1511     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1512     const StateCache& stateCache)
1513 {
1514     if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1515         gpuBuffer) {
1516         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1517         const VkBuffer buffer = plat.buffer;
1518         const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1519         const auto indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1520 
1521         vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1522             buffer,                                // buffer
1523             offset,                                // offset
1524             indexType);                            // indexType
1525     }
1526 }
1527 
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1528 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1529     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1530 {
1531     const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1532     const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1533     if (srcImagePtr && dstImagePtr) {
1534         const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1535         const auto& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1536 
1537         const ImageBlit& ib = renderCmd.imageBlit;
1538         const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1539                                            ? srcPlatImage.arrayLayers
1540                                            : ib.srcSubresource.layerCount;
1541         const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1542                                            ? dstPlatImage.arrayLayers
1543                                            : ib.dstSubresource.layerCount;
1544 
1545         const VkImageSubresourceLayers srcSubresourceLayers {
1546             (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1547             ib.srcSubresource.mipLevel,                             // mipLevel
1548             ib.srcSubresource.baseArrayLayer,                       // baseArrayLayer
1549             srcLayerCount,                                          // layerCount
1550         };
1551         const VkImageSubresourceLayers dstSubresourceLayers {
1552             (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1553             ib.dstSubresource.mipLevel,                             // mipLevel
1554             ib.dstSubresource.baseArrayLayer,                       // baseArrayLayer
1555             dstLayerCount,                                          // layerCount
1556         };
1557 
1558         const VkImageBlit imageBlit {
1559             srcSubresourceLayers, // srcSubresource
1560             { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1561                 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1562                     (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1563             dstSubresourceLayers,                        // dstSubresource
1564             { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1565                 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1566                     (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1567         };
1568 
1569         vkCmdBlitImage(cmdBuf.commandBuffer,         // commandBuffer
1570             srcPlatImage.image,                      // srcImage
1571             (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1572             dstPlatImage.image,                      // dstImage
1573             (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1574             1,                                       // regionCount
1575             &imageBlit,                              // pRegions
1576             (VkFilter)renderCmd.filter);             // filter
1577     }
1578 }
1579 
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1580 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1581     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1582 {
1583     const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1584     const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1585 
1586     if (srcGpuBuffer && dstGpuBuffer) {
1587         const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1588         const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1589         const VkBufferCopy bufferCopy {
1590             renderCmd.bufferCopy.srcOffset,
1591             renderCmd.bufferCopy.dstOffset,
1592             renderCmd.bufferCopy.size,
1593         };
1594 
1595         if (bufferCopy.size > 0) {
1596             vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1597                 srcBuffer,                        // srcBuffer
1598                 dstBuffer,                        // dstBuffer
1599                 1,                                // regionCount
1600                 &bufferCopy);                     // pRegions
1601         }
1602     }
1603 }
1604 
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1605 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1606     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1607     const StateCache& stateCache)
1608 {
1609     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1610         PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1611         return;
1612     }
1613 
1614     const GpuBufferVk* gpuBuffer = nullptr;
1615     const GpuImageVk* gpuImage = nullptr;
1616     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1617         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1618         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1619     } else {
1620         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1621         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1622     }
1623 
1624     if (gpuBuffer && gpuImage) {
1625         const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1626         const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1627         const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1628         const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1629                                         ? platImage.arrayLayers
1630                                         : subresourceLayer.layerCount;
1631         const VkImageSubresourceLayers imageSubresourceLayer {
1632             (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1633             subresourceLayer.mipLevel,
1634             subresourceLayer.baseArrayLayer,
1635             layerCount,
1636         };
1637         const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1638         // Math::min to force staying inside image
1639         const uint32_t mip = subresourceLayer.mipLevel;
1640         const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1641         const Size3D& imageOffset = bufferImageCopy.imageOffset;
1642         const VkExtent3D imageExtent = {
1643             Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1644             Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1645             Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1646         };
1647         const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1648                            (imageOffset.depth < imageSize.depth);
1649         const VkBufferImageCopy bufferImageCopyVk {
1650             bufferImageCopy.bufferOffset,
1651             bufferImageCopy.bufferRowLength,
1652             bufferImageCopy.bufferImageHeight,
1653             imageSubresourceLayer,
1654             { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1655                 static_cast<int32_t>(imageOffset.depth) },
1656             imageExtent,
1657         };
1658 
1659         const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1660         const VkImage image = (gpuImage->GetPlatformData()).image;
1661 
1662         if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1663             vkCmdCopyBufferToImage(cmdBuf.commandBuffer,             // commandBuffer
1664                 buffer,                                              // srcBuffer
1665                 image,                                               // dstImage
1666                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1667                 1,                                                   // regionCount
1668                 &bufferImageCopyVk);                                 // pRegions
1669         } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1670             vkCmdCopyImageToBuffer(cmdBuf.commandBuffer,             // commandBuffer
1671                 image,                                               // srcImage
1672                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1673                 buffer,                                              // dstBuffer
1674                 1,                                                   // regionCount
1675                 &bufferImageCopyVk);                                 // pRegions
1676         }
1677     }
1678 }
1679 
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1680 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1681     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1682 {
1683     const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1684     const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1685     if (srcGpuImage && dstGpuImage) {
1686         const ImageCopy& copy = renderCmd.imageCopy;
1687         const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1688         const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1689 
1690         const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1691         const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1692         const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1693                                            ? srcPlatImage.arrayLayers
1694                                            : srcSubresourceLayer.layerCount;
1695         const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1696                                            ? dstPlatImage.arrayLayers
1697                                            : dstSubresourceLayer.layerCount;
1698 
1699         const VkImageSubresourceLayers srcImageSubresourceLayer {
1700             (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1701             srcSubresourceLayer.mipLevel,
1702             srcSubresourceLayer.baseArrayLayer,
1703             srcLayerCount,
1704         };
1705         const VkImageSubresourceLayers dstImageSubresourceLayer {
1706             (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1707             dstSubresourceLayer.mipLevel,
1708             dstSubresourceLayer.baseArrayLayer,
1709             dstLayerCount,
1710         };
1711 
1712         const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1713         const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1714 
1715         VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1716         ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1717         ext.height =
1718             Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1719         ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1720 
1721         const VkImageCopy imageCopyVk {
1722             srcImageSubresourceLayer,                                 // srcSubresource
1723             { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1724             dstImageSubresourceLayer,                                 // dstSubresource
1725             { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1726             ext,                                                      // extent
1727         };
1728         vkCmdCopyImage(cmdBuf.commandBuffer,                     // commandBuffer
1729             srcPlatImage.image,                                  // srcImage
1730             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1731             dstPlatImage.image,                                  // dstImage
1732             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1733             1,                                                   // regionCount
1734             &imageCopyVk);                                       // pRegions
1735     }
1736 }
1737 
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1738 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1739     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1740     const RenderBarrierList& rbl)
1741 {
1742     if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1743         return;
1744     }
1745 
1746     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1747         rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1748     PLUGIN_ASSERT(barrierPointBarriers);
1749     if (!barrierPointBarriers) {
1750         return;
1751     }
1752     constexpr uint32_t maxBarrierCount { 8 };
1753     VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1754     VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1755     VkMemoryBarrier memoryBarriers[maxBarrierCount];
1756 
1757     // generally there is only single barrierListCount per barrier point
1758     // in situations with batched render passes there can be many
1759     // NOTE: all barrier lists could be patched to single vk command if needed
1760     // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1761     const auto barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1762     const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1763 #if (RENDER_VALIDATION_ENABLED == 1)
1764     uint32_t fullBarrierCount = 0u;
1765 #endif
1766     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1767         if (nextBarrierList == nullptr) { // cannot be null, just a safety
1768             PLUGIN_ASSERT(false);
1769             return;
1770         }
1771         const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1772         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1773         const auto barrierCount = (uint32_t)barrierListRef.count;
1774 
1775         uint32_t bufferBarrierIdx = 0;
1776         uint32_t imageBarrierIdx = 0;
1777         uint32_t memoryBarrierIdx = 0;
1778 
1779         VkPipelineStageFlags srcPipelineStageMask { 0 };
1780         VkPipelineStageFlags dstPipelineStageMask { 0 };
1781         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1782 
1783         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1784             const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1785 
1786             uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1787             uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1788             if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1789                 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1790                 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1791             }
1792 
1793             const RenderHandle resourceHandle = ref.resourceHandle;
1794             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1795 
1796             PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1797                           (handleType == RenderHandleType::GPU_IMAGE));
1798 
1799             const auto srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1800             const auto dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1801 
1802             srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1803             dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1804 
1805             // NOTE: zero size buffer barriers allowed ATM
1806             if (handleType == RenderHandleType::GPU_BUFFER) {
1807                 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1808                     const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1809                     // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1810                     const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1811                     const VkDeviceSize size =
1812                         Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1813                             (VkDeviceSize)ref.dst.optionalByteSize);
1814                     if (platBuffer.buffer) {
1815                         bufferMemoryBarriers[bufferBarrierIdx++] = {
1816                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1817                             nullptr,                                 // pNext
1818                             srcAccessMask,                           // srcAccessMask
1819                             dstAccessMask,                           // dstAccessMask
1820                             srcQueueFamilyIndex,                     // srcQueueFamilyIndex
1821                             dstQueueFamilyIndex,                     // dstQueueFamilyIndex
1822                             platBuffer.buffer,                       // buffer
1823                             offset,                                  // offset
1824                             size,                                    // size
1825                         };
1826                     }
1827                 }
1828             } else if (handleType == RenderHandleType::GPU_IMAGE) {
1829                 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1830                     const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1831 
1832                     const auto srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1833                     const auto dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1834 
1835                     const VkImageAspectFlags imageAspectFlags =
1836                         (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1837                             ? platImage.aspectFlags
1838                             : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1839 
1840                     const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1841                                                     PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1842                                                     ? VK_REMAINING_MIP_LEVELS
1843                                                     : ref.src.optionalImageSubresourceRange.levelCount;
1844 
1845                     const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1846                                                     PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1847                                                     ? VK_REMAINING_ARRAY_LAYERS
1848                                                     : ref.src.optionalImageSubresourceRange.layerCount;
1849 
1850                     const VkImageSubresourceRange imageSubresourceRange {
1851                         imageAspectFlags,                                     // aspectMask
1852                         ref.src.optionalImageSubresourceRange.baseMipLevel,   // baseMipLevel
1853                         levelCount,                                           // levelCount
1854                         ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1855                         layerCount,                                           // layerCount
1856                     };
1857 
1858                     if (platImage.image) {
1859                         imageMemoryBarriers[imageBarrierIdx++] = {
1860                             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1861                             nullptr,                                // pNext
1862                             srcAccessMask,                          // srcAccessMask
1863                             dstAccessMask,                          // dstAccessMask
1864                             srcImageLayout,                         // oldLayout
1865                             dstImageLayout,                         // newLayout
1866                             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
1867                             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
1868                             platImage.image,                        // image
1869                             imageSubresourceRange,                  // subresourceRange
1870                         };
1871                     }
1872                 }
1873             } else {
1874                 memoryBarriers[memoryBarrierIdx++] = {
1875                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1876                     nullptr,                          // pNext
1877                     srcAccessMask,                    // srcAccessMask
1878                     dstAccessMask,                    // dstAccessMask
1879                 };
1880             }
1881 
1882             const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1883             const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1884                                         (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)));
1885 
1886             if (hasBarriers && resetBarriers) {
1887 #if (RENDER_VALIDATION_ENABLED == 1)
1888                 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1889 #endif
1890                 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1891                     srcPipelineStageMask,                  // srcStageMask
1892                     dstPipelineStageMask,                  // dstStageMask
1893                     dependencyFlags,                       // dependencyFlags
1894                     memoryBarrierIdx,                      // memoryBarrierCount
1895                     memoryBarriers,                        // pMemoryBarriers
1896                     bufferBarrierIdx,                      // bufferMemoryBarrierCount
1897                     bufferMemoryBarriers,                  // pBufferMemoryBarriers
1898                     imageBarrierIdx,                       // imageMemoryBarrierCount
1899                     imageMemoryBarriers);                  // pImageMemoryBarriers
1900 
1901                 bufferBarrierIdx = 0;
1902                 imageBarrierIdx = 0;
1903                 memoryBarrierIdx = 0;
1904             }
1905         }
1906     }
1907 #if (RENDER_VALIDATION_ENABLED == 1)
1908     if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1909         PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1910             "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1911             barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1912     }
1913 #endif
1914 }
1915 
1916 namespace {
1917 struct DescriptorSetUpdateDataStruct {
1918     uint32_t accelIndex { 0U };
1919     uint32_t bufferIndex { 0U };
1920     uint32_t imageIndex { 0U };
1921     uint32_t samplerIndex { 0U };
1922     uint32_t writeBindIdx { 0U };
1923 };
1924 
UpdateSingleDescriptorSet(const GpuResourceManager & gpuResourceMgr,RenderBackendVk::StateCache * stateCache,const LowLevelDescriptorSetVk * descriptorSet,const DescriptorSetLayoutBindingResourcesHandler & bindingResources,LowLevelContextDescriptorWriteDataVk & wd,DescriptorSetUpdateDataStruct & dsud)1925 void UpdateSingleDescriptorSet(const GpuResourceManager& gpuResourceMgr, RenderBackendVk::StateCache* stateCache,
1926     const LowLevelDescriptorSetVk* descriptorSet, const DescriptorSetLayoutBindingResourcesHandler& bindingResources,
1927     LowLevelContextDescriptorWriteDataVk& wd, DescriptorSetUpdateDataStruct& dsud)
1928 {
1929     // actual vulkan descriptor set update
1930     if (descriptorSet && descriptorSet->descriptorSet) {
1931         if ((uint32_t)bindingResources.bindings.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1932             PLUGIN_ASSERT(false);
1933             return;
1934         }
1935         const auto& buffers = bindingResources.buffers;
1936         const auto& images = bindingResources.images;
1937         const auto& samplers = bindingResources.samplers;
1938         for (const auto& refBuf : buffers) {
1939             const auto& ref = refBuf.desc;
1940             const uint32_t descriptorCount = ref.binding.descriptorCount;
1941             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1942             if (descriptorCount == 0) {
1943                 continue;
1944             }
1945             const uint32_t arrayOffset = ref.arrayOffset;
1946             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1947             if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1948 #if (RENDER_VULKAN_RT_ENABLED == 1)
1949                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1950                     // first is the ref, starting from 1 we use array offsets
1951                     const BindableBuffer& bRes =
1952                         (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1953                     if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1954                         const GpuAccelerationStructurePlatformDataVk& platAccel =
1955                             resPtr->GetPlatformDataAccelerationStructure();
1956                         wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1957                             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1958                             nullptr,                                                           // pNext
1959                             descriptorCount,                  // accelerationStructureCount
1960                             &platAccel.accelerationStructure, // pAccelerationStructures
1961                         };
1962                     }
1963                 }
1964                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1965                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1966                     &wd.descriptorAccelInfos[dsud.accelIndex],    // pNext
1967                     descriptorSet->descriptorSet,                 // dstSet
1968                     ref.binding.binding,                          // dstBinding
1969                     0,                                            // dstArrayElement
1970                     descriptorCount,                              // descriptorCount
1971                     (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1972                     nullptr,                                      // pImageInfo
1973                     nullptr,                                      // pBufferInfo
1974                     nullptr,                                      // pTexelBufferView
1975                 };
1976                 dsud.accelIndex += descriptorCount;
1977 #endif
1978             } else {
1979                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1980                     // first is the ref, starting from 1 we use array offsets
1981                     const BindableBuffer& bRes =
1982                         (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1983                     const auto optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1984                     if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1985                         const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1986                         // takes into account dynamic ring buffers with mapping
1987                         const auto bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1988                         const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1989                         const VkDeviceSize bufferRange =
1990                             Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1991                                 (VkDeviceSize)bRes.byteSize);
1992                         wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1993                             platBuffer.buffer, // buffer
1994                             byteOffset,        // offset
1995                             bufferRange,       // range
1996                         };
1997                     }
1998                 }
1999                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2000                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
2001                     nullptr,                                      // pNext
2002                     descriptorSet->descriptorSet,                 // dstSet
2003                     ref.binding.binding,                          // dstBinding
2004                     0,                                            // dstArrayElement
2005                     descriptorCount,                              // descriptorCount
2006                     (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2007                     nullptr,                                      // pImageInfo
2008                     &wd.descriptorBufferInfos[dsud.bufferIndex],  // pBufferInfo
2009                     nullptr,                                      // pTexelBufferView
2010                 };
2011                 dsud.bufferIndex += descriptorCount;
2012             }
2013         }
2014         for (const auto& refImg : images) {
2015             const auto& ref = refImg.desc;
2016             const uint32_t descriptorCount = ref.binding.descriptorCount;
2017             // skip, array bindings which are bound from first index have also descriptorCount 0
2018             if (descriptorCount == 0) {
2019                 continue;
2020             }
2021             const auto descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2022             const uint32_t arrayOffset = ref.arrayOffset;
2023             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2024             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2025                 // first is the ref, starting from 1 we use array offsets
2026                 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].desc.resource;
2027                 if (const GpuImageVk* resPtr = gpuResourceMgr.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2028                     VkSampler sampler = VK_NULL_HANDLE;
2029                     if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2030                         const GpuSamplerVk* samplerPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2031                         if (samplerPtr) {
2032                             sampler = samplerPtr->GetPlatformData().sampler;
2033                         }
2034                     }
2035                     const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2036                     const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2037                     VkImageView imageView = platImage.imageView;
2038                     if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2039                         (bRes.layer < platImageViews.layerImageViews.size())) {
2040                         imageView = platImageViews.layerImageViews[bRes.layer];
2041                     } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2042                         if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2043                             (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2044                             imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2045                         } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2046                             imageView = platImageViews.mipImageViews[bRes.mip];
2047                         }
2048                     }
2049                     wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2050                         sampler,                         // sampler
2051                         imageView,                       // imageView
2052                         (VkImageLayout)bRes.imageLayout, // imageLayout
2053                     };
2054                 }
2055             }
2056             wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2057                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,    // sType
2058                 nullptr,                                   // pNext
2059                 descriptorSet->descriptorSet,              // dstSet
2060                 ref.binding.binding,                       // dstBinding
2061                 0,                                         // dstArrayElement
2062                 descriptorCount,                           // descriptorCount
2063                 descriptorType,                            // descriptorType
2064                 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2065                 nullptr,                                   // pBufferInfo
2066                 nullptr,                                   // pTexelBufferView
2067             };
2068             dsud.imageIndex += descriptorCount;
2069         }
2070         for (const auto& refSam : samplers) {
2071             const auto& ref = refSam.desc;
2072             const uint32_t descriptorCount = ref.binding.descriptorCount;
2073             // skip, array bindings which are bound from first index have also descriptorCount 0
2074             if (descriptorCount == 0) {
2075                 continue;
2076             }
2077             const uint32_t arrayOffset = ref.arrayOffset;
2078             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2079             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2080                 // first is the ref, starting from 1 we use array offsets
2081                 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].desc.resource;
2082                 if (const GpuSamplerVk* resPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2083                     const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2084                     wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2085                         platSampler.sampler,      // sampler
2086                         VK_NULL_HANDLE,           // imageView
2087                         VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2088                     };
2089                 }
2090             }
2091             wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2092                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,        // sType
2093                 nullptr,                                       // pNext
2094                 descriptorSet->descriptorSet,                  // dstSet
2095                 ref.binding.binding,                           // dstBinding
2096                 0,                                             // dstArrayElement
2097                 descriptorCount,                               // descriptorCount
2098                 (VkDescriptorType)ref.binding.descriptorType,  // descriptorType
2099                 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2100                 nullptr,                                       // pBufferInfo
2101                 nullptr,                                       // pTexelBufferView
2102             };
2103             dsud.samplerIndex += descriptorCount;
2104         }
2105 #if (RENDER_PERF_ENABLED == 1)
2106         // count the actual updated descriptors sets, not the api calls
2107         if (stateCache) {
2108             stateCache->perfCounters.updateDescriptorSetCount++;
2109         }
2110 #endif
2111     }
2112 }
2113 } // namespace
2114 
UpdateGlobalDescriptorSets()2115 void RenderBackendVk::UpdateGlobalDescriptorSets()
2116 {
2117     RENDER_CPU_PERF_SCOPE("UpdateGlobalDescriptorSets", "");
2118 
2119     auto& dsMgr = (DescriptorSetManagerVk&)device_.GetDescriptorSetManager();
2120     LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2121     const auto& allDescSets = dsMgr.GetUpdateDescriptorSetHandles();
2122     const uint32_t upDescriptorSetCount =
2123         static_cast<uint32_t>(Math::min(allDescSets.size(), wd.writeDescriptorSets.size()));
2124     DescriptorSetUpdateDataStruct dsud;
2125 
2126     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2127         if (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET) {
2128             continue;
2129         }
2130         const RenderHandle descHandle = allDescSets[descIdx];
2131         // first update gpu descriptor indices
2132         if (!dsMgr.UpdateDescriptorSetGpuHandle(descHandle)) {
2133             continue; // continue if not dirty
2134         }
2135 
2136         const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2137         const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2138 
2139         UpdateSingleDescriptorSet(gpuResourceMgr_, nullptr, descriptorSet, bindingResources, wd, dsud);
2140 
2141         // NOTE: should update perf counters
2142     }
2143 
2144     // update if the batch ended or we are the last descriptor set
2145     if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2146         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2147         vkUpdateDescriptorSets(device,     // device
2148             dsud.writeBindIdx,             // descriptorWriteCount
2149             wd.writeDescriptorSets.data(), // pDescriptorWrites
2150             0,                             // descriptorCopyCount
2151             nullptr);                      // pDescriptorCopies
2152     }
2153 }
2154 
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)2155 void RenderBackendVk::UpdateCommandListDescriptorSets(
2156     const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
2157 {
2158     auto& dsMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
2159 
2160     const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2161     const auto upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
2162     LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2163     DescriptorSetUpdateDataStruct dsud;
2164     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2165         if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
2166             (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
2167             continue;
2168         }
2169 
2170         const RenderHandle descHandle = allDescSets[descIdx];
2171         // first update gpu descriptor indices
2172         if (!dsMgr.UpdateDescriptorSetGpuHandle(descHandle)) {
2173             continue; // continue if not dirty
2174         }
2175 
2176         const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2177         const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2178 
2179         UpdateSingleDescriptorSet(gpuResourceMgr_, &stateCache, descriptorSet, bindingResources, wd, dsud);
2180     }
2181     // update if the batch ended or we are the last descriptor set
2182     if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2183         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2184         vkUpdateDescriptorSets(device,     // device
2185             dsud.writeBindIdx,             // descriptorWriteCount
2186             wd.writeDescriptorSets.data(), // pDescriptorWrites
2187             0,                             // descriptorCopyCount
2188             nullptr);                      // pDescriptorCopies
2189     }
2190 }
2191 
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2192 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2193     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2194     StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2195 {
2196     const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2197 
2198     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2199     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2200     const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2201                                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
2202                                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
2203     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2204 
2205     bool valid = (pipelineLayout != VK_NULL_HANDLE);
2206     const uint32_t firstSet = renderCmd.firstSet;
2207     const uint32_t setCount = renderCmd.setCount;
2208     if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2209         uint32_t dynamicOffsetDescriptorSetIndices = 0;
2210         uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2211 
2212         VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2213         const uint32_t firstPlusCount = firstSet + setCount;
2214         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2215             const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2216             if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2217                 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2218                 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2219 
2220                 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2221                 if (descriptorSet && descriptorSet->descriptorSet) {
2222                     descriptorSets[idx] = descriptorSet->descriptorSet;
2223                     // update, copy to state cache
2224                     PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2225                     stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2226                     const uint32_t currShift = (idx * 16u);
2227                     const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2228                     uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2229                     stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2230                 } else {
2231                     valid = false;
2232                 }
2233             }
2234         }
2235 
2236         uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2237                                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2238         uint32_t dynamicOffsetIdx = 0;
2239         // NOTE: optimize
2240         // this code has some safety checks that the offset is not updated for non-dynamic sets
2241         // it could be left on only for validation
2242         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2243             if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2244                 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2245                 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2246                 const auto dodResCount = static_cast<uint32_t>(dod.resources.size());
2247                 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2248                 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2249                     uint32_t byteOffset = 0U;
2250                     if (descriptorSetDynamicOffsets.dynamicOffsets &&
2251                         (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount)) {
2252                         byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dodIdx];
2253                     }
2254                     dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2255                 }
2256             }
2257         }
2258 
2259         stateCache.validBindings = valid;
2260         if (stateCache.validBindings) {
2261             if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2262                 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2263                     pipelineBindPoint,                        // pipelineBindPoint
2264                     pipelineLayout,                           // layout
2265                     firstSet,                                 // firstSet
2266                     setCount,                                 // descriptorSetCount
2267                     &descriptorSets[firstSet],                // pDescriptorSets
2268                     dynamicOffsetIdx,                         // dynamicOffsetCount
2269                     dynamicOffsets);                          // pDynamicOffsets
2270 #if (RENDER_PERF_ENABLED == 1)
2271                 stateCache.perfCounters.bindDescriptorSetCount++;
2272 #endif
2273             } else {
2274                 // possible pso re-creation and bind of these sets to the new pso
2275                 PLUGIN_LOG_E("vkCmdBindDescriptorSets hit des cache");
2276                 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2277                     (PipelineBindPoint)pipelineBindPoint };
2278                 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2279                 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2280             }
2281         } else {
2282             PLUGIN_LOG_E("stateCache.validBindings invalid");
2283         }
2284     }
2285 }
2286 
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2287 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2288     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2289 {
2290     PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2291     PLUGIN_ASSERT(renderCmd.data);
2292 
2293     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2294     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2295 
2296     const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0));
2297     PLUGIN_ASSERT(valid);
2298 
2299     if (valid) {
2300         const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2301         vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2302             pipelineLayout,                      // layout
2303             shaderStageFlags,                    // stageFlags
2304             0,                                   // offset
2305             renderCmd.pushConstant.byteSize,     // size
2306             static_cast<void*>(renderCmd.data)); // pValues
2307     }
2308 }
2309 
2310 namespace {
2311 struct DeviceAddressOffset {
2312     uint64_t address;
2313     uint64_t offset;
2314 };
2315 
GetValidDeviceAddress(const DeviceAddressOffset & bo,bool & valid)2316 inline constexpr VkDeviceOrHostAddressConstKHR GetValidDeviceAddress(const DeviceAddressOffset& bo, bool& valid)
2317 {
2318     valid = valid && (bo.address != 0);
2319     return { bo.address + bo.offset };
2320 }
2321 } // namespace
2322 
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2323 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2324     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2325     const StateCache& stateCache)
2326 {
2327 #if (RENDER_VULKAN_RT_ENABLED == 1)
2328     const AsBuildGeometryData& geometry = renderCmd.geometry;
2329 
2330     const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(geometry.dstAccelerationStructure);
2331     const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(geometry.scratchBuffer.handle);
2332     if ((!dst) || (!scratchBuffer)) {
2333         return; // early out
2334     }
2335 
2336     const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2337     const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2338 
2339     bool validAddresses = true;
2340 
2341     const size_t arraySize =
2342         renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2343     vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2344     vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2345 
2346     uint32_t arrayIndex = 0;
2347     const bool isTopLevel =
2348         (geometry.info.type == AccelerationStructureType::CORE_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL);
2349     for (const auto& ref : renderCmd.trianglesView) {
2350         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2351             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2352             nullptr,                                               // pNext
2353             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
2354             {},                                                    // geometry;
2355             VkGeometryFlagsKHR(ref.info.geometryFlags),            // flags
2356         };
2357         uint32_t primitiveCount = 0;
2358         const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.vertexData.handle);
2359         const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.indexData.handle);
2360         if (vb && ib) {
2361             const VkDeviceOrHostAddressConstKHR vertexAddress =
2362                 GetValidDeviceAddress({ vb->GetPlatformData().deviceAddress, ref.vertexData.offset }, validAddresses);
2363             const VkDeviceOrHostAddressConstKHR indexAddress =
2364                 GetValidDeviceAddress({ ib->GetPlatformData().deviceAddress, ref.indexData.offset }, validAddresses);
2365 
2366             VkDeviceOrHostAddressConstKHR transformAddress {};
2367             if (RenderHandleUtil::IsValid(ref.transformData.handle)) {
2368                 if (const GpuBufferVk* tr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.transformData.handle);
2369                     tr) {
2370                     transformAddress = GetValidDeviceAddress(
2371                         { tr->GetPlatformData().deviceAddress, ref.transformData.offset }, validAddresses);
2372                 }
2373             }
2374             primitiveCount = ref.info.indexCount / 3u; // triangles
2375 
2376             geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2377                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2378                 nullptr,                                                              // pNext
2379                 VkFormat(ref.info.vertexFormat),                                      // vertexFormat
2380                 vertexAddress,                                                        // vertexData
2381                 VkDeviceSize(ref.info.vertexStride),                                  // vertexStride
2382                 ref.info.maxVertex,                                                   // maxVertex
2383                 VkIndexType(ref.info.indexType),                                      // indexType
2384                 indexAddress,                                                         // indexData
2385                 transformAddress,                                                     // transformData
2386             };
2387         }
2388         buildRangeInfos[arrayIndex] = {
2389             primitiveCount, // primitiveCount
2390             0u,             // primitiveOffset
2391             0u,             // firstVertex
2392             0u,             // transformOffset
2393         };
2394         arrayIndex++;
2395     }
2396     for (const auto& ref : renderCmd.aabbsView) {
2397         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2398             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2399             nullptr,                                               // pNext
2400             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
2401             {},                                                    // geometry
2402             VkGeometryFlagsKHR(ref.info.geometryFlags),            // flags
2403         };
2404         VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2405         if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.data.handle); ptr) {
2406             deviceAddress =
2407                 GetValidDeviceAddress({ ptr->GetPlatformData().deviceAddress, ref.data.offset }, validAddresses);
2408         }
2409         geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2410             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2411             nullptr,                                                          // pNext
2412             deviceAddress,                                                    // data
2413             ref.info.stride,                                                  // stride
2414         };
2415 
2416         buildRangeInfos[arrayIndex] = {
2417             1u, // primitiveCount
2418             0u, // primitiveOffset
2419             0u, // firstVertex
2420             0u, // transformOffset
2421         };
2422         arrayIndex++;
2423     }
2424     for (const auto& ref : renderCmd.instancesView) {
2425         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2426             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2427             nullptr,                                               // pNext
2428             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
2429             {},                                                    // geometry;
2430             VkGeometryFlagsKHR(ref.info.geometryFlags),            // flags
2431         };
2432         VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2433         if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.data.handle); ptr) {
2434             deviceAddress.deviceAddress = ptr->GetPlatformData().deviceAddress;
2435             deviceAddress =
2436                 GetValidDeviceAddress({ ptr->GetPlatformData().deviceAddress, ref.data.offset }, validAddresses);
2437         }
2438         geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2439             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2440             nullptr,                                                              // pNext
2441             ref.info.arrayOfPointers,                                             // arrayOfPointers
2442             deviceAddress,                                                        // data
2443         };
2444         buildRangeInfos[arrayIndex] = {
2445             ref.info.primitiveCount, // primitiveCount
2446             0u,                      // primitiveOffset
2447             0u,                      // firstVertex
2448             0u,                      // transformOffset
2449         };
2450         arrayIndex++;
2451     }
2452 
2453     const VkDeviceOrHostAddressKHR scratchData = { GetValidDeviceAddress(
2454         { scratchBuffer->GetPlatformData().deviceAddress, geometry.scratchBuffer.offset }, validAddresses)
2455                                                        .deviceAddress };
2456 
2457     const uint32_t geometryCount = isTopLevel ? 1U : arrayIndex;
2458     const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2459         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2460         nullptr,                                                          // pNext
2461         VkAccelerationStructureTypeKHR(geometry.info.type),               // type
2462         VkBuildAccelerationStructureFlagsKHR(geometry.info.flags),        // flags
2463         VkBuildAccelerationStructureModeKHR(geometry.info.mode),          // mode
2464         VK_NULL_HANDLE,                                                   // srcAccelerationStructure
2465         dstAs,                                                            // dstAccelerationStructure
2466         geometryCount,                                                    // geometryCount
2467         geometryData.data(),                                              // pGeometries
2468         nullptr,                                                          // ppGeometries
2469         scratchData,                                                      // scratchData
2470     };
2471 
2472     vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2473     for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2474         buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2475     }
2476     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2477     if (validAddresses && extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2478         extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2479             1U,                                                                // infoCount
2480             &buildGeometryInfo,                                                // pInfos
2481             buildRangeInfosPtr.data());                                        // ppBuildRangeInfos
2482     }
2483 #if (RENDER_VALIDATION_ENABLED == 1)
2484     if (!validAddresses) {
2485         const string tmpStr = "RenderBackendVk::RenderCommandBuildAccelerationStructure_address";
2486         PLUGIN_LOG_ONCE_W(
2487             tmpStr, "RENDER_VALIDATION: Invalid device addresses in RenderCommandBuildAccelerationStructure");
2488     }
2489 #endif
2490 #endif
2491 }
2492 
RenderCommand(const RenderCommandCopyAccelerationStructureInstances & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2493 void RenderBackendVk::RenderCommand(const RenderCommandCopyAccelerationStructureInstances& renderCmd,
2494     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2495     const StateCache& stateCache)
2496 {
2497 #if (RENDER_VULKAN_RT_ENABLED == 1)
2498     // NOTE: at the moment just handles barriers and copy here
2499     const RenderHandle dstHandle = renderCmd.destination.handle;
2500     const GpuBufferDesc dstBufferDesc = gpuResourceMgr_.GetBufferDescriptor(dstHandle);
2501     if (uint8_t* dstDataBegin = static_cast<uint8_t*>(gpuResourceMgr_.MapBuffer(dstHandle)); dstDataBegin) {
2502         const uint8_t* dstDataEnd = dstDataBegin + dstBufferDesc.byteSize;
2503         // add render command offset
2504         dstDataBegin += size_t(renderCmd.destination.offset);
2505         // loop and copy all instances
2506         bool validAddresses = true;
2507         for (uint32_t idx = 0; idx < renderCmd.instancesView.size(); ++idx) {
2508             const auto& ref = renderCmd.instancesView[idx];
2509             uint64_t accelDeviceAddress = 0;
2510             if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(ref.accelerationStructure); ptr) {
2511                 accelDeviceAddress = GetValidDeviceAddress(
2512                     { ptr->GetPlatformDataAccelerationStructure().deviceAddress, 0 }, validAddresses)
2513                                          .deviceAddress;
2514             }
2515             const auto& tr = ref.transform;
2516             // convert 4x3 column to 3x4 row
2517             VkAccelerationStructureInstanceKHR instance {
2518                 { { { tr[0].x, tr[1].x, tr[2].x, tr[3].x }, { tr[0].y, tr[1].y, tr[2].y, tr[3].y },
2519                     { tr[0].z, tr[1].z, tr[2].z, tr[3].z } } }, // transform
2520                 ref.instanceCustomIndex,                        // instanceCustomIndex : 24
2521                 ref.mask,                                       // mask : 8
2522                 0U,                                             // instanceShaderBindingTableRecordOffset : 24
2523                 VkGeometryInstanceFlagsKHR(ref.flags),          // flags : 8
2524                 accelDeviceAddress,                             // accelerationStructureReference
2525             };
2526             constexpr size_t byteSize = sizeof(VkAccelerationStructureInstanceKHR);
2527             uint8_t* dstData = dstDataBegin + byteSize * idx;
2528             CloneData(dstData, size_t(dstDataEnd - dstData), &instance, byteSize);
2529         }
2530         gpuResourceMgr_.UnmapBuffer(dstHandle);
2531 
2532 #if (RENDER_VALIDATION_ENABLED == 1)
2533         if (!validAddresses) {
2534             const string tmpStr = "RenderBackendVk::RenderCommandCopyAccelerationStructureInstances_address";
2535             PLUGIN_LOG_ONCE_W(tmpStr,
2536                 "RENDER_VALIDATION: Invalid device addresses in RenderCommandCopyAccelerationStructureInstances");
2537         }
2538 #endif
2539     }
2540 #endif
2541 }
2542 
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2543 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2544     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2545     const StateCache& stateCache)
2546 {
2547     const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2548     // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2549     const auto imageLayout = (VkImageLayout)renderCmd.imageLayout;
2550     PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2551     if (imagePtr) {
2552         const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2553         if (platImage.image) {
2554             VkClearColorValue clearColor;
2555             PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2556             CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2557 
2558             // NOTE: temporary vector allocated due to not having max limit
2559             vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2560             for (size_t idx = 0; idx < ranges.size(); ++idx) {
2561                 const auto& inputRef = renderCmd.ranges[idx];
2562                 ranges[idx] = {
2563                     (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2564                     inputRef.baseMipLevel,                         // baseMipLevel
2565                     inputRef.levelCount,                           // levelCount
2566                     inputRef.baseArrayLayer,                       // baseArrayLayer
2567                     inputRef.layerCount,                           // layerCount
2568                 };
2569             }
2570 
2571             vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2572                 platImage.image,                       // image
2573                 imageLayout,                           // imageLayout
2574                 &clearColor,                           // pColor
2575                 static_cast<uint32_t>(ranges.size()),  // rangeCount
2576                 ranges.data());                        // pRanges
2577         }
2578     }
2579 }
2580 
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2581 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2582     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2583     const StateCache& stateCache)
2584 {
2585     const ViewportDesc& vd = renderCmd.viewportDesc;
2586 
2587     VkViewport vp {
2588         vd.x,        // x
2589         vd.y,        // y
2590         vd.width,    // width
2591         vd.height,   // height
2592         vd.minDepth, // minDepth
2593         vd.maxDepth, // maxDepth
2594     };
2595     // handle viewport for surface transform
2596     const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2597     if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2598         if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) ==
2599             CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2600             vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.height - vd.y;
2601             vp.y = vd.x;
2602             vp.width = vd.height;
2603             vp.height = vd.width;
2604         } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) ==
2605                    CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2606             vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.width - vd.x;
2607             vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.height - vd.y;
2608         } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) ==
2609                    CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2610             vp.x = vd.y;
2611             vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.width - vd.x;
2612             vp.width = vd.height;
2613             vp.height = vd.width;
2614         }
2615     }
2616 
2617     vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2618         0,                                 // firstViewport
2619         1,                                 // viewportCount
2620         &vp);                              // pViewports
2621 }
2622 
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2623 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2624     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2625     const StateCache& stateCache)
2626 {
2627     const ScissorDesc& sd = renderCmd.scissorDesc;
2628 
2629     VkRect2D sc {
2630         { sd.offsetX, sd.offsetY },          // offset
2631         { sd.extentWidth, sd.extentHeight }, // extent
2632     };
2633     // handle scissor for surface transform
2634     const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2635     if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2636         if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) ==
2637             CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2638             sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.height - sc.offset.y, sc.offset.x },
2639                 { sc.extent.height, sc.extent.width } };
2640         } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) ==
2641                    CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2642             sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.width - sc.offset.x,
2643                        (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.height - sc.offset.y },
2644                 { sc.extent.width, sc.extent.height } };
2645         } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) ==
2646                    CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2647             sc = { { sc.offset.y, (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.width - sc.offset.x },
2648                 { sc.extent.height, sc.extent.width } };
2649         }
2650     }
2651 
2652     vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2653         0,                                // firstScissor
2654         1,                                // scissorCount
2655         &sc);                             // pScissors
2656 }
2657 
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2658 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2659     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2660     const StateCache& stateCache)
2661 {
2662     vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2663         renderCmd.lineWidth);               // lineWidth
2664 }
2665 
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2666 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2667     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2668     const StateCache& stateCache)
2669 {
2670     vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2671         renderCmd.depthBiasConstantFactor,  // depthBiasConstantFactor
2672         renderCmd.depthBiasClamp,           // depthBiasClamp
2673         renderCmd.depthBiasSlopeFactor);    // depthBiasSlopeFactor
2674 }
2675 
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2676 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2677     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2678     const StateCache& stateCache)
2679 {
2680     vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2681         renderCmd.blendConstants);               // blendConstants[4]
2682 }
2683 
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2684 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2685     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2686     const StateCache& stateCache)
2687 {
2688     vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2689         renderCmd.minDepthBounds,             // minDepthBounds
2690         renderCmd.maxDepthBounds);            // maxDepthBounds
2691 }
2692 
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2693 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2694     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2695     const StateCache& stateCache)
2696 {
2697     const auto stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2698 
2699     if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2700         vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2701             stencilFaceMask,                             // faceMask
2702             renderCmd.mask);                             // compareMask
2703     } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2704         vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2705             stencilFaceMask,                           // faceMask
2706             renderCmd.mask);                           // writeMask
2707     } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2708         vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2709             stencilFaceMask,                           // faceMask
2710             renderCmd.mask);                           // reference
2711     }
2712 }
2713 
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2714 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2715     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2716     const StateCache& stateCache)
2717 {
2718 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2719     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2720     if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2721         const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2722         const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2723             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2724             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2725         };
2726 
2727         extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2728             &fragmentSize,                                                // pFragmentSize
2729             combinerOps);                                                 // combinerOps
2730     }
2731 #endif
2732 }
2733 
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2734 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2735     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2736     const StateCache& stateCache)
2737 {
2738     if (renderCmd.command) {
2739         const RenderBackendRecordingStateVk recordingState = {
2740             {},
2741             cmdBuf.commandBuffer,                              // commandBuffer
2742             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2743             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2744             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2745             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2746             stateCache.pipelineLayout,                         // pipelineLayout
2747         };
2748         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2749         renderCmd.command->ExecuteBackendCommand(lowLevelDevice, recordingState);
2750     } else if (stateCache.backendNode) {
2751         // legacy support for backend render nodes
2752         const RenderBackendRecordingStateVk recordingState = {
2753             {},
2754             cmdBuf.commandBuffer,                              // commandBuffer
2755             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2756             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2757             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2758             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2759             stateCache.pipelineLayout,                         // pipelineLayout
2760         };
2761         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2762         stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2763     }
2764 }
2765 
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2766 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2767     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2768 {
2769     PLUGIN_ASSERT_MSG(false, "not implemented");
2770 
2771     const auto pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2772     const uint32_t queryIndex = renderCmd.queryIndex;
2773     VkQueryPool queryPool = VK_NULL_HANDLE;
2774 
2775     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2776         queryPool,                            // queryPool
2777         queryIndex,                           // firstQuery
2778         1);                                   // queryCount
2779 
2780     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2781         pipelineStageFlagBits,                // pipelineStage
2782         queryPool,                            // queryPool
2783         queryIndex);                          // query
2784 }
2785 
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2786 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2787 {
2788     for (auto& presRef : presentationData_.infos) {
2789         if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2790             continue;
2791         }
2792 
2793         PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2794         PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2795 
2796         const GpuResourceState& state = presRef.renderGraphProcessedState;
2797         const auto srcAccessMask = (VkAccessFlags)state.accessFlags;
2798         const auto dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2799         const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2800                                                   (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2801         const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2802         const auto oldLayout = (VkImageLayout)presRef.imageLayout;
2803         const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2804         // NOTE: queue is not currently checked (should be in the same queue as last time used)
2805         constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2806         constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2807         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2808         constexpr VkImageSubresourceRange imageSubresourceRange {
2809             VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2810             0,                                                // baseMipLevel
2811             1,                                                // levelCount
2812             0,                                                // baseArrayLayer
2813             1,                                                // layerCount
2814         };
2815 
2816         const VkImageMemoryBarrier imageMemoryBarrier {
2817             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2818             nullptr,                                // pNext
2819             srcAccessMask,                          // srcAccessMask
2820             dstAccessMask,                          // dstAccessMask
2821             oldLayout,                              // oldLayout
2822             newLayout,                              // newLayout
2823             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
2824             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
2825             presRef.swapchainImage,                 // image
2826             imageSubresourceRange,                  // subresourceRange
2827         };
2828 
2829         vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2830             srcStageMask,                          // srcStageMask
2831             dstStageMask,                          // dstStageMask
2832             dependencyFlags,                       // dependencyFlags
2833             0,                                     // memoryBarrierCount
2834             nullptr,                               // pMemoryBarriers
2835             0,                                     // bufferMemoryBarrierCount
2836             nullptr,                               // pBufferMemoryBarriers
2837             1,                                     // imageMemoryBarrierCount
2838             &imageMemoryBarrier);                  // pImageMemoryBarriers
2839 
2840         presRef.presentationLayoutChangeNeeded = false;
2841         presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2842     }
2843 }
2844 
2845 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
BeginDebugMarker(const LowLevelCommandBufferVk & cmdBuf,const BASE_NS::string_view name,const Math::Vec4 color)2846 void RenderBackendVk::BeginDebugMarker(
2847     const LowLevelCommandBufferVk& cmdBuf, const BASE_NS::string_view name, const Math::Vec4 color)
2848 {
2849     if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
2850         const VkDebugUtilsLabelEXT label {
2851             VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
2852             nullptr,                                 // pNext
2853             name.data(),                             // pLabelName
2854             { color.x, color.y, color.z, color.w }   // color[4]
2855         };
2856         deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuf.commandBuffer, &label);
2857     }
2858 }
2859 
EndDebugMarker(const LowLevelCommandBufferVk & cmdBuf)2860 void RenderBackendVk::EndDebugMarker(const LowLevelCommandBufferVk& cmdBuf)
2861 {
2862     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
2863         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuf.commandBuffer);
2864     }
2865 }
2866 #endif
2867 
2868 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
RenderCommand(const RenderCommandBeginDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2869 void RenderBackendVk::RenderCommand(const RenderCommandBeginDebugMarker& renderCmd,
2870     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2871     const StateCache& stateCache)
2872 {
2873     BeginDebugMarker(cmdBuf, renderCmd.name, renderCmd.color);
2874 }
2875 
RenderCommand(const RenderCommandEndDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2876 void RenderBackendVk::RenderCommand(const RenderCommandEndDebugMarker& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2877     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2878 {
2879     EndDebugMarker(cmdBuf);
2880 }
2881 #endif
2882 
2883 #if (RENDER_PERF_ENABLED == 1)
2884 
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2885 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2886 {
2887     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2888         const string_view& debugName = renderCommandContext.debugName;
2889         if (timers_.count(debugName) == 0) { // new timers
2890 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2891             PerfDataSet& perfDataSet = timers_[debugName];
2892             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2893             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2894             constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2895             perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2896 #else
2897             timers_.insert({ debugName, {} });
2898 #endif
2899         }
2900     }
2901 
2902 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2903     perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2904     perfGpuTimerData_.currentOffset =
2905         (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2906 #endif
2907 }
2908 
EndFrameTimers()2909 void RenderBackendVk::EndFrameTimers()
2910 {
2911     int64_t fullGpuTime = 0;
2912 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2913     // already in micros
2914     fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2915     perfGpuTimerData_.fullGpuCounter = 0;
2916 
2917     perfGpuTimerData_.gpuBuffer->Unmap();
2918 #endif
2919     if (IPerformanceDataManagerFactory* globalPerfData =
2920             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2921         globalPerfData) {
2922         IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2923         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2924         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2925         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2926         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2927         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2928         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2929 
2930         CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2931         CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2932         CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2933         CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2934         CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2935         CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2936     }
2937     // go through and count combined draw counts for tracing
2938     PerfCounters counters;
2939     for (auto& timer : timers_) {
2940         CopyPerfCounters(timer.second.perfCounters, counters);
2941         timer.second.perfCounters = {}; // reset perf counters
2942     }
2943 
2944     CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(counters.drawCount));
2945     CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(counters.drawIndirectCount));
2946     CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(counters.dispatchCount));
2947     CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(counters.dispatchIndirectCount));
2948     CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(counters.renderPassCount));
2949     CORE_PROFILER_PLOT("Bind pipeline count", static_cast<int64_t>(counters.bindPipelineCount));
2950     CORE_PROFILER_PLOT("Bind descriptor set count", static_cast<int64_t>(counters.bindDescriptorSetCount));
2951     CORE_PROFILER_PLOT("Update descriptor set count", static_cast<int64_t>(counters.updateDescriptorSetCount));
2952     CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(counters.instanceCount));
2953     CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(counters.triangleCount));
2954 }
2955 
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2956 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2957     const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2958 {
2959 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2960     if (stateCache.secondaryCommandBuffer) {
2961         return; // cannot be called inside render pass (e.g. with secondary command buffers)
2962     }
2963     PLUGIN_ASSERT(timers_.count(name) == 1);
2964     const PerfDataSet* perfDataSet = &timers_[name];
2965     if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2966         const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2967         if (platData.queryPool) {
2968             vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2969                 platData.queryPool,                   // queryPool
2970                 queryIndex,                           // firstQuery
2971                 1);                                   // queryCount
2972 
2973             vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2974                 stageFlagBits,                        // pipelineStage,
2975                 platData.queryPool,                   // queryPool,
2976                 queryIndex);                          // query
2977         }
2978     }
2979 #endif
2980 }
2981 
2982 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2983 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2984 {
2985     perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount,
2986         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2987     perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount,
2988         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2989     perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount,
2990         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2991     perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount,
2992         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2993     perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount,
2994         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2995     perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount,
2996         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2997     perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount,
2998         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2999     perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount,
3000         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3001     perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount,
3002         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3003     perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount,
3004         CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3005 }
3006 } // namespace
3007 
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)3008 void RenderBackendVk::CopyPerfTimeStamp(
3009     const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
3010 {
3011     PLUGIN_ASSERT(timers_.count(name) == 1);
3012     PerfDataSet* const perfDataSet = &timers_[name];
3013 
3014 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
3015     // take data from earlier queries to cpu
3016     // and copy in from query to gpu buffer
3017     const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
3018     int64_t gpuMicroSeconds = 0;
3019     {
3020         auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
3021         auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
3022 
3023         const uint64_t startStamp = *currentData;
3024         const uint64_t endStamp = *(currentData + 1);
3025 
3026         const double timestampPeriod =
3027             static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
3028                                     .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
3029         constexpr int64_t nanosToMicrosDivisor { 1000 };
3030         gpuMicroSeconds =
3031             static_cast<int64_t>(static_cast<double>(endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
3032         constexpr int64_t maxValidMicroSecondValue { 4294967295 };
3033         if (gpuMicroSeconds > maxValidMicroSecondValue) {
3034             gpuMicroSeconds = 0;
3035         }
3036         perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
3037     }
3038 #endif
3039     const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
3040 
3041     if (IPerformanceDataManagerFactory* globalPerfData =
3042             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
3043         globalPerfData) {
3044         IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
3045 
3046         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
3047 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
3048         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
3049 
3050         // cannot be called inside render pass (e.g. with secondary command buffers)
3051         if (!stateCache.secondaryCommandBuffer) {
3052             if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
3053                 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
3054 
3055                 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
3056                 PLUGIN_ASSERT(gpuBuffer);
3057                 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
3058 
3059                 constexpr uint32_t queryCount = 2;
3060                 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
3061                 constexpr VkQueryResultFlags queryResultFlags =
3062                     VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
3063 
3064                 if (platData.queryPool) {
3065                     vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
3066                         platData.queryPool,                         // queryPool
3067                         0,                                          // firstQuery
3068                         queryCount,                                 // queryCount
3069                         platBuffer.buffer,                          // dstBuffer
3070                         currentFrameByteOffset,                     // dstOffset
3071                         queryStride,                                // stride
3072                         queryResultFlags);                          // flags
3073                 }
3074             }
3075         }
3076 #endif
3077         UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
3078     }
3079 }
3080 
3081 #endif
3082 RENDER_END_NAMESPACE()
3083