• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_vk.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan.h>
22 
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34 
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40 
41 #include "device/gpu_acceleration_structure.h"
42 #include "device/gpu_buffer.h"
43 #include "device/gpu_image.h"
44 #include "device/gpu_resource_handle_util.h"
45 #include "device/gpu_resource_manager.h"
46 #include "device/gpu_sampler.h"
47 #include "device/pipeline_state_object.h"
48 #include "device/render_frame_sync.h"
49 #include "nodecontext/node_context_descriptor_set_manager.h"
50 #include "nodecontext/node_context_pool_manager.h"
51 #include "nodecontext/node_context_pso_manager.h"
52 #include "nodecontext/render_barrier_list.h"
53 #include "nodecontext/render_command_list.h"
54 #include "nodecontext/render_node_graph_node_store.h"
55 #include "render_backend.h"
56 #include "render_graph.h"
57 #include "util/log.h"
58 #include "vulkan/gpu_acceleration_structure_vk.h"
59 #include "vulkan/gpu_buffer_vk.h"
60 #include "vulkan/gpu_image_vk.h"
61 #include "vulkan/gpu_sampler_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68 
69 using namespace BASE_NS;
70 
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78 
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84     const VkBufferDeviceAddressInfo addressInfo {
85         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86         nullptr,                                      // pNext
87         buffer,                                       // buffer
88     };
89     return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 } // namespace
93 
94 // Helper class for running std::function as a ThreadPool task.
95 class FunctionTask final : public IThreadPool::ITask {
96 public:
Create(std::function<void ()> func)97     static Ptr Create(std::function<void()> func)
98     {
99         return Ptr { new FunctionTask(func) };
100     }
101 
FunctionTask(std::function<void ()> func)102     explicit FunctionTask(std::function<void()> func) : func_(func) {};
103 
operator ()()104     void operator()() override
105     {
106         func_();
107     }
108 
109 protected:
Destroy()110     void Destroy() override
111     {
112         delete this;
113     }
114 
115 private:
116     std::function<void()> func_;
117 };
118 
119 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
120 namespace {
121 constexpr const string_view COMMAND_NAMES[] = { "Undefined", "Draw", "DrawIndirect", "Dispatch", "DispatchIndirect",
122     "BindPipeline", "BeginRenderPass", "NextSubpass", "EndRenderPass", "BindVertexBuffers", "BindIndexBuffer",
123     "CopyBuffer", "CopyBufferImage", "BlitImage", "BarrierPoint", "UpdateDescriptorSets", "BindDescriptorSets",
124     "PushConstant", "DynamicStateViewport", "DynamicStateScissor", "DynamicStateLineWidth", "DynamicStateDepthBias",
125     "DynamicStateBlendConstants", "DynamicStateDepthBounds", "DynamicStateStencil", "ExecuteBackendFramePosition",
126     "WriteTimestamp", "GpuQueueTransferRelease", "GpuGueueTransferAcquire" };
127 } // namespace
128 #endif
129 
130 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
131 namespace {
132 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
133 }
134 #endif
135 
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,const CORE_NS::IParallelTaskQueue::Ptr & queue)136 RenderBackendVk::RenderBackendVk(
137     Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
138     : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
139       queue_(queue.get())
140 {
141 #if (RENDER_PERF_ENABLED == 1)
142 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
143     gpuQueryMgr_ = make_unique<GpuQueryManager>();
144 
145     constexpr uint32_t maxQueryObjectCount { 512u };
146     constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
147     const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
148     const GpuBufferDesc desc {
149         BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT,                        // usageFlags
150         CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
151         0,                                                                              // engineCreationFlags
152         fullByteSize,                                                                   // byteSize
153     };
154     perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
155     perfGpuTimerData_.currentOffset = 0;
156     perfGpuTimerData_.frameByteSize = byteSize;
157     perfGpuTimerData_.fullByteSize = fullByteSize;
158     { // zero initialize
159         uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
160         memset_s(bufferData, fullByteSize, 0, fullByteSize);
161         perfGpuTimerData_.gpuBuffer->Unmap();
162     }
163 #endif
164 #endif
165 }
166 
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)167 void RenderBackendVk::AcquirePresentationInfo(
168     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
169 {
170     presentationInfo_ = {};
171     if ((backBufferConfig.config.backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) &&
172         device_.HasSwapchain()) {
173         presentationInfo_.useSwapchain = true;
174         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175 
176         const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain());
177         if (swapchain) {
178             const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179             const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180             presentationInfo_.swapchainSemaphore = platSwapchain.swapchainImages.semaphore;
181 
182             const VkResult result = vkAcquireNextImageKHR(device, // device
183                 vkSwapchain,                                      // swapchin
184                 UINT64_MAX,                                       // timeout
185                 presentationInfo_.swapchainSemaphore,             // semaphore
186                 (VkFence) nullptr,                                // fence
187                 &presentationInfo_.swapchainImageIndex);          // pImageIndex
188 
189             switch (result) {
190                 // Success
191                 case VK_SUCCESS:
192                 case VK_TIMEOUT:
193                 case VK_NOT_READY:
194                 case VK_SUBOPTIMAL_KHR:
195                     presentationInfo_.validAcquire = true;
196                     break;
197 
198                 // Failure
199                 case VK_ERROR_OUT_OF_HOST_MEMORY:
200                 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
201                     PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
202                     return;
203                 case VK_ERROR_DEVICE_LOST:
204                     PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
205                     return;
206                 case VK_ERROR_OUT_OF_DATE_KHR:
207                     PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
208                     return;
209                 case VK_ERROR_SURFACE_LOST_KHR:
210                     PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
211                     return;
212 
213                 case VK_EVENT_SET:
214                 case VK_EVENT_RESET:
215                 case VK_INCOMPLETE:
216                 case VK_ERROR_INITIALIZATION_FAILED:
217                 case VK_ERROR_MEMORY_MAP_FAILED:
218                 case VK_ERROR_LAYER_NOT_PRESENT:
219                 case VK_ERROR_EXTENSION_NOT_PRESENT:
220                 case VK_ERROR_FEATURE_NOT_PRESENT:
221                 case VK_ERROR_INCOMPATIBLE_DRIVER:
222                 case VK_ERROR_TOO_MANY_OBJECTS:
223                 case VK_ERROR_FORMAT_NOT_SUPPORTED:
224                 case VK_ERROR_FRAGMENTED_POOL:
225                 case VK_ERROR_OUT_OF_POOL_MEMORY:
226                 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
227                 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
228                 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
229                 case VK_ERROR_VALIDATION_FAILED_EXT:
230                 case VK_ERROR_INVALID_SHADER_NV:
231                 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
232                 case VK_ERROR_FRAGMENTATION_EXT:
233                 case VK_ERROR_NOT_PERMITTED_EXT:
234                 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
235                 case VK_RESULT_MAX_ENUM:
236                 default:
237                     PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
238                     PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
239                     device_.SetDeviceStatus(false);
240                     break;
241             }
242 
243             PLUGIN_ASSERT(
244                 presentationInfo_.swapchainImageIndex < (uint32_t)platSwapchain.swapchainImages.images.size());
245 
246             // remap image to backbuffer
247             const RenderHandle backBufferHandle =
248                 gpuResourceMgr_.GetImageRawHandle(backBufferConfig.config.backBufferName);
249             const RenderHandle currentSwapchainHandle = gpuResourceMgr_.GetImageRawHandle(
250                 "CORE_DEFAULT_SWAPCHAIN_" + to_string(presentationInfo_.swapchainImageIndex));
251             // special swapchain remapping
252             gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(backBufferHandle, currentSwapchainHandle);
253             presentationInfo_.renderGraphProcessedState = backBufferConfig.backBufferState;
254             presentationInfo_.imageLayout = backBufferConfig.layout;
255             if (presentationInfo_.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
256                 presentationInfo_.presentationLayoutChangeNeeded = true;
257                 presentationInfo_.renderNodeCommandListIndex =
258                     static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
259 
260                 const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(backBufferHandle);
261                 PLUGIN_ASSERT(swapImage);
262                 presentationInfo_.swapchainImage = swapImage->GetPlatformData().image;
263             }
264         }
265     }
266 
267 #if (RENDER_VALIDATION_ENABLED == 1)
268     if ((backBufferConfig.config.backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) &&
269         (!device_.HasSwapchain())) {
270         PLUGIN_LOG_E("RENDER_VALIDATION: trying to present without swapchain");
271     }
272 #endif
273 }
274 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)275 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
276 {
277     if (presentationInfo_.useSwapchain && backBufferConfig.config.present && presentationInfo_.validAcquire) {
278         PLUGIN_ASSERT(!presentationInfo_.presentationLayoutChangeNeeded);
279         PLUGIN_ASSERT(presentationInfo_.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR);
280 #if (RENDER_PERF_ENABLED == 1)
281         commonCpuTimers_.present.Begin();
282 #endif
283         const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain());
284         if (swapchain) {
285             const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
286             const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
287 
288             PLUGIN_ASSERT(
289                 presentationInfo_.swapchainImageIndex < (uint32_t)platSwapchain.swapchainImages.images.size());
290 
291             // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid semaphore)
292             VkSemaphore waitSemaphore = VK_NULL_HANDLE;
293             uint32_t waitSemaphoreCount = 0;
294             if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
295                 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
296                 waitSemaphoreCount = 1;
297             }
298 
299             const VkPresentInfoKHR presentInfo {
300                 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,     // sType
301                 nullptr,                                // pNext
302                 waitSemaphoreCount,                     // waitSemaphoreCount
303                 &waitSemaphore,                         // pWaitSemaphores
304                 1,                                      // swapchainCount
305                 &vkSwapchain,                           // pSwapchains
306                 &presentationInfo_.swapchainImageIndex, // pImageIndices
307                 nullptr                                 // pResults
308             };
309 
310             const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
311             const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
312                 &presentInfo);                                             // pPresentInfo
313 
314             switch (result) {
315                     // Success
316                 case VK_SUCCESS:
317                     break;
318                 case VK_SUBOPTIMAL_KHR:
319 #if (RENDER_VALIDATION_ENABLED == 1)
320                     PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
321 #endif
322                     break;
323 
324                     // Failure
325                 case VK_ERROR_OUT_OF_HOST_MEMORY:
326                 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
327                     PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
328                     return;
329                 case VK_ERROR_DEVICE_LOST:
330                     PLUGIN_LOG_E("vkQueuePresentKHR device lost");
331                     return;
332                 case VK_ERROR_OUT_OF_DATE_KHR:
333                     PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
334                     return;
335                 case VK_ERROR_SURFACE_LOST_KHR:
336                     PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
337                     return;
338 
339                 case VK_NOT_READY:
340                 case VK_TIMEOUT:
341                 case VK_EVENT_SET:
342                 case VK_EVENT_RESET:
343                 case VK_INCOMPLETE:
344                 case VK_ERROR_INITIALIZATION_FAILED:
345                 case VK_ERROR_MEMORY_MAP_FAILED:
346                 case VK_ERROR_LAYER_NOT_PRESENT:
347                 case VK_ERROR_EXTENSION_NOT_PRESENT:
348                 case VK_ERROR_FEATURE_NOT_PRESENT:
349                 case VK_ERROR_INCOMPATIBLE_DRIVER:
350                 case VK_ERROR_TOO_MANY_OBJECTS:
351                 case VK_ERROR_FORMAT_NOT_SUPPORTED:
352                 case VK_ERROR_FRAGMENTED_POOL:
353                 case VK_ERROR_OUT_OF_POOL_MEMORY:
354                 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
355                 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
356                 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
357                 case VK_ERROR_VALIDATION_FAILED_EXT:
358                 case VK_ERROR_INVALID_SHADER_NV:
359                 case VK_ERROR_FRAGMENTATION_EXT:
360                 case VK_ERROR_NOT_PERMITTED_EXT:
361                 case VK_RESULT_MAX_ENUM:
362                 default:
363                     PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
364                     PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
365                     break;
366             }
367         }
368 #if (RENDER_PERF_ENABLED == 1)
369         {
370             commonCpuTimers_.present.End();
371         }
372 #endif
373     }
374 }
375 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)376 void RenderBackendVk::Render(
377     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
378 {
379     // NOTE: all command lists are validated before entering here
380 #if (RENDER_PERF_ENABLED == 1)
381     commonCpuTimers_.full.Begin();
382     commonCpuTimers_.acquire.Begin();
383 #endif
384 
385     commandBufferSubmitter_ = {};
386     commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
387 
388     AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
389     if (presentationInfo_.useSwapchain && (!presentationInfo_.validAcquire)) {
390         return;
391     }
392 
393 #if (RENDER_PERF_ENABLED == 1)
394     commonCpuTimers_.acquire.End();
395 
396     StartFrameTimers(renderCommandFrameData);
397     commonCpuTimers_.execute.Begin();
398 #endif
399 
400     // command list process loop/execute
401     RenderProcessCommandLists(renderCommandFrameData);
402 
403 #if (RENDER_PERF_ENABLED == 1)
404     commonCpuTimers_.execute.End();
405     commonCpuTimers_.submit.Begin();
406 #endif
407 
408     PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
409     // submit vulkan command buffers
410     RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
411 
412 #if (RENDER_PERF_ENABLED == 1)
413     commonCpuTimers_.submit.End();
414     commonCpuTimers_.full.End();
415     EndFrameTimers();
416 #endif
417 }
418 
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)419 void RenderBackendVk::RenderProcessSubmitCommandLists(
420     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
421 {
422     // NOTE: currently backtraces to final valid command buffer semaphore
423     uint32_t finalCommandBufferSubmissionIndex = ~0u;
424     commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
425     for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
426          --cmdBufferIdx) {
427         if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
428             (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
429                 VK_NULL_HANDLE)) {
430             finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
431             break;
432         }
433     }
434 
435     for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
436         const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
437         if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
438             continue;
439         }
440 
441         const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
442 
443         uint32_t waitSemaphoreCount = 0u;
444         VkSemaphore waitSemaphores[PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + 1]; // + 1 for swapchain
445         VkPipelineStageFlags waitSemaphorePipelineStageFlags[PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS];
446         for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
447             const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
448             PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
449 
450             VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
451             if (waitSemaphore != VK_NULL_HANDLE) {
452                 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
453                 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
454                 waitSemaphoreCount++;
455             }
456         }
457 
458         if ((renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
459             (presentationInfo_.swapchainSemaphore != VK_NULL_HANDLE)) {
460             waitSemaphores[waitSemaphoreCount] = presentationInfo_.swapchainSemaphore;
461             waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
462             waitSemaphoreCount++;
463         }
464 
465         uint32_t signalSemaphoreCount = 0u;
466         VkSemaphore semaphores[] = { VK_NULL_HANDLE, VK_NULL_HANDLE };
467         VkFence fence = VK_NULL_HANDLE;
468         if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
469             // add fence signaling to last submission for frame sync
470             if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
471                 fence = frameSync->GetFrameFence().fence;
472                 frameSync->FrameFenceIsSignalled();
473             }
474 
475             if (presentationInfo_.useSwapchain && backBufferConfig.config.present) {
476                 commandBufferSubmitter_.presentationWaitSemaphore =
477                     commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
478                 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
479             }
480             if (backBufferConfig.config.gpuSemaphoreHandle != 0) {
481                 semaphores[signalSemaphoreCount++] =
482                     VulkanHandleCast<VkSemaphore>(backBufferConfig.config.gpuSemaphoreHandle);
483             }
484         } else if (renderContextRef.submitDepencies.signalSemaphore) {
485             semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
486         }
487         PLUGIN_ASSERT(signalSemaphoreCount <= 2); // 2: no more than 2 semaphores
488 
489         const VkSubmitInfo submitInfo {
490             VK_STRUCTURE_TYPE_SUBMIT_INFO,   // sType
491             nullptr,                         // pNext
492             waitSemaphoreCount,              // waitSemaphoreCount
493             waitSemaphores,                  // pWaitSemaphores
494             waitSemaphorePipelineStageFlags, // pWaitDstStageMask
495             1,                               // commandBufferCount
496             &cmdSubmitterRef.commandBuffer,  // pCommandBuffers
497             signalSemaphoreCount,            // signalSemaphoreCount
498             semaphores,                      // pSignalSemaphores
499         };
500 
501         const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
502         VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
503             1,                                  // submitCount
504             &submitInfo,                        // pSubmits
505             fence));                            // fence
506     }
507 }
508 
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData)509 void RenderBackendVk::RenderProcessCommandLists(RenderCommandFrameData& renderCommandFrameData)
510 {
511     if (queue_) {
512         for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
513             // NOTE: idx increase
514             // NOTE: currently does not multi-thread dependant multi render command list render passes
515             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
516             PLUGIN_ASSERT(ref.multiRenderCommandListCount > 0);
517             const uint32_t rcCount = ref.multiRenderCommandListCount;
518             queue_->Submit(cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
519                 MultiRenderCommandListDesc mrcDesc;
520                 mrcDesc.multiRenderCommandListCount = rcCount;
521                 mrcDesc.baseContext =
522                     (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
523 
524                 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
525                     const uint32_t currIdx = cmdBufferIdx + rcIdx;
526                     mrcDesc.multiRenderCommandListIndex = rcIdx;
527                     RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
528                     const DebugNames debugNames { ref2.debugName,
529                         renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
530                     RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
531                 }
532             }));
533 
534             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
535         }
536 
537         // Execute and wait for completion.
538         queue_->Execute();
539         queue_->Clear();
540     } else {
541         for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
542             // NOTE: idx increase
543             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
544             PLUGIN_ASSERT(ref.multiRenderCommandListCount > 0);
545             const uint32_t rcCount = ref.multiRenderCommandListCount;
546 
547             MultiRenderCommandListDesc mrcDesc;
548             mrcDesc.multiRenderCommandListCount = rcCount;
549             mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
550 
551             for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
552                 const uint32_t currIdx = cmdBufferIdx + rcIdx;
553                 mrcDesc.multiRenderCommandListIndex = rcIdx;
554                 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
555                 const DebugNames debugNames { ref2.debugName,
556                     renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
557                 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
558             }
559             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
560         }
561     }
562 }
563 
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)564 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
565     const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
566 {
567     // these are validated in render graph
568     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
569     const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
570     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
571     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
572     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.contextPoolMgr;
573 
574     ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
575 
576     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
577 
578     StateCache stateCache = {}; // state cache for this render command list
579     stateCache.backendNode = renderCommandCtx.renderBackendNode;
580 
581     // command buffer has been wait with a single frame fence
582     const bool multiRenderCommandList =
583         (multiRenderCommandListDesc.multiRenderCommandListCount > 1 && multiRenderCommandListDesc.baseContext);
584     const bool beginCommandBuffer =
585         (!multiRenderCommandList || (multiRenderCommandListDesc.multiRenderCommandListIndex == 0));
586     const bool endCommandBuffer =
587         (!multiRenderCommandList || (multiRenderCommandListDesc.multiRenderCommandListIndex ==
588                                         multiRenderCommandListDesc.multiRenderCommandListCount - 1));
589     const ContextCommandPoolVk* ptrCmdPool = nullptr;
590     if (multiRenderCommandList) {
591         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(multiRenderCommandListDesc.baseContext->contextPoolMgr))
592                           ->GetContextCommandPool();
593     } else {
594         ptrCmdPool = &((NodeContextPoolManagerVk&)contextPoolMgr).GetContextCommandPool();
595     }
596     PLUGIN_ASSERT(ptrCmdPool);
597     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
598 
599 #if (RENDER_PERF_ENABLED == 1)
600 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
601     const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
602     const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
603 #endif
604     PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
605     PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
606 #endif
607 
608     if (beginCommandBuffer) {
609         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
610         constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
611         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
612             ptrCmdPool->commandPool,                  // commandPool
613             commandPoolResetFlags));                  // flags
614 
615         constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
616             VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
617         };
618         const VkCommandBufferBeginInfo commandBufferBeginInfo {
619             VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
620             nullptr,                                     // pNext
621             commandBufferUsageFlags,                     // flags
622             nullptr,                                     // pInheritanceInfo
623         };
624 
625         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
626             &commandBufferBeginInfo));                                   // pBeginInfo
627 
628 #if (RENDER_PERF_ENABLED == 1)
629 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
630         if (validGpuQueries) {
631             GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
632             PLUGIN_ASSERT(gpuQuery);
633 
634             gpuQuery->NextQueryIndex();
635 
636             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
637                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
638         }
639 #endif
640         perfDataSet->cpuTimer.Begin();
641 #endif
642     }
643 
644 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
645     if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
646         const VkDebugUtilsLabelEXT label {
647             VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
648             nullptr,                                 // pNext
649             debugNames.renderCommandListName.data(), // pLabelName
650             { 1.f, 1.f, 1.f, 1.f }                   // color[4]
651         };
652         deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
653     }
654 #endif
655 
656     for (const auto& ref : rcRef) {
657         PLUGIN_ASSERT(ref.rc);
658 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
659         if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
660             const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
661             const VkDebugUtilsLabelEXT label {
662                 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
663                 nullptr,                                 // pNext
664                 COMMAND_NAMES[index].data(),             // pLabelName
665                 { 0.87f, 0.83f, 0.29f, 1.f }             // color[4]
666             };
667             deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
668         }
669 #endif
670 
671         switch (ref.type) {
672             case RenderCommandType::BARRIER_POINT: {
673                 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
674 
675                 // handle all barriers before render command that needs resource syncing
676                 RenderCommand(
677                     barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
678                 break;
679             }
680             case RenderCommandType::DRAW: {
681                 RenderCommand(
682                     *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
683                 break;
684             }
685             case RenderCommandType::DRAW_INDIRECT: {
686                 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
687                     contextPoolMgr, stateCache);
688                 break;
689             }
690             case RenderCommandType::DISPATCH: {
691                 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
692                     contextPoolMgr, stateCache);
693                 break;
694             }
695             case RenderCommandType::DISPATCH_INDIRECT: {
696                 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
697                     contextPoolMgr, stateCache);
698                 break;
699             }
700             case RenderCommandType::BIND_PIPELINE: {
701                 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
702                     contextPoolMgr, stateCache);
703                 break;
704             }
705             case RenderCommandType::BEGIN_RENDER_PASS: {
706                 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
707                     contextPoolMgr, stateCache);
708                 break;
709             }
710             case RenderCommandType::NEXT_SUBPASS: {
711                 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
712                     contextPoolMgr, stateCache);
713                 break;
714             }
715             case RenderCommandType::END_RENDER_PASS: {
716                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
717                     contextPoolMgr, stateCache);
718                 break;
719             }
720             case RenderCommandType::BIND_VERTEX_BUFFERS: {
721                 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
722                     contextPoolMgr, stateCache);
723                 break;
724             }
725             case RenderCommandType::BIND_INDEX_BUFFER: {
726                 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
727                     contextPoolMgr, stateCache);
728                 break;
729             }
730             case RenderCommandType::COPY_BUFFER: {
731                 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
732                     contextPoolMgr, stateCache);
733                 break;
734             }
735             case RenderCommandType::COPY_BUFFER_IMAGE: {
736                 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
737                     contextPoolMgr, stateCache);
738                 break;
739             }
740             case RenderCommandType::COPY_IMAGE: {
741                 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
742                     contextPoolMgr, stateCache);
743                 break;
744             }
745             case RenderCommandType::UPDATE_DESCRIPTOR_SETS: {
746                 RenderCommand(*static_cast<RenderCommandUpdateDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
747                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
748                 break;
749             }
750             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
751                 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
752                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
753                 break;
754             }
755             case RenderCommandType::PUSH_CONSTANT: {
756                 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
757                     contextPoolMgr, stateCache);
758                 break;
759             }
760             case RenderCommandType::BLIT_IMAGE: {
761                 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
762                     contextPoolMgr, stateCache);
763                 break;
764             }
765             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
766                 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
767                     nodeContextPsoMgr, contextPoolMgr, stateCache);
768                 break;
769             }
770             // dynamic states
771             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
772                 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
773                     contextPoolMgr, stateCache);
774                 break;
775             }
776             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
777                 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
778                     contextPoolMgr, stateCache);
779                 break;
780             }
781             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
782                 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
783                     contextPoolMgr, stateCache);
784                 break;
785             }
786             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
787                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
788                     contextPoolMgr, stateCache);
789                 break;
790             }
791             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
792                 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
793                     nodeContextPsoMgr, contextPoolMgr, stateCache);
794                 break;
795             }
796             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
797                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
798                     contextPoolMgr, stateCache);
799                 break;
800             }
801             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
802                 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
803                     contextPoolMgr, stateCache);
804                 break;
805             }
806             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
807                 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
808                     nodeContextPsoMgr, contextPoolMgr, stateCache);
809                 break;
810             }
811             //
812             case RenderCommandType::WRITE_TIMESTAMP: {
813                 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
814                     contextPoolMgr, stateCache);
815                 break;
816             }
817             case RenderCommandType::UNDEFINED:
818             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
819             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
820             default: {
821                 PLUGIN_ASSERT(false && "non-valid render command");
822                 break;
823             }
824         }
825 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
826         if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
827             deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
828         }
829 #endif
830     }
831 
832     if (presentationInfo_.renderNodeCommandListIndex == cmdBufIdx) {
833         RenderPresentationLayout(cmdBuffer);
834     }
835 
836 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
837     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
838         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
839     }
840 #endif
841 
842     if (endCommandBuffer) {
843 #if (RENDER_PERF_ENABLED == 1)
844         perfDataSet->cpuTimer.End();
845 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
846         if (validGpuQueries) {
847             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
848                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
849         }
850 #endif
851         CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
852 #endif
853 
854         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
855 
856         commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
857     }
858 }
859 
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)860 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
861     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
862 {
863     const RenderHandle psoHandle = renderCmd.psoHandle;
864     const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
865 
866     stateCache.psoHandle = psoHandle;
867 
868     VkPipeline pipeline { VK_NULL_HANDLE };
869     VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
870     if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
871         const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
872             psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
873         if (pso) {
874             const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
875             pipeline = plat.pipeline;
876             pipelineLayout = plat.pipelineLayout;
877         }
878     } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
879         PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
880         if (stateCache.renderCommandBeginRenderPass) {
881             uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
882             if (stateCache.pipelineDescSetHash != 0) {
883                 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
884             }
885             const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
886                 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
887                     stateCache.renderCommandBeginRenderPass->subpasses,
888                     stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
889                     &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
890             if (pso) {
891                 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
892                 pipeline = plat.pipeline;
893                 pipelineLayout = plat.pipelineLayout;
894             }
895         }
896     }
897     PLUGIN_ASSERT(pipeline);
898     PLUGIN_ASSERT(pipelineLayout);
899 
900     const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
901     if (valid) {
902         stateCache.pipelineLayout = pipelineLayout;
903         stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
904         vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
905             pipelineBindPoint,                  // pipelineBindPoint
906             pipeline);                          // pipeline
907 #if (RENDER_PERF_ENABLED == 1)
908         stateCache.perfCounters.bindPipelineCount++;
909 #endif
910     }
911 }
912 
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)913 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
914     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
915 {
916     if (renderCmd.indexCount) {
917         vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
918             renderCmd.indexCount,              // indexCount
919             renderCmd.instanceCount,           // instanceCount
920             renderCmd.firstIndex,              // firstIndex
921             renderCmd.vertexOffset,            // vertexOffset
922             renderCmd.firstInstance);          // firstInstance
923 #if (RENDER_PERF_ENABLED == 1)
924         stateCache.perfCounters.drawCount++;
925         stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
926         stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
927 #endif
928     } else {
929         vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
930             renderCmd.vertexCount,      // vertexCount
931             renderCmd.instanceCount,    // instanceCount
932             renderCmd.firstVertex,      // firstVertex
933             renderCmd.firstInstance);   // firstInstance
934 #if (RENDER_PERF_ENABLED == 1)
935         stateCache.perfCounters.drawCount++;
936         stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
937         stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
938             * renderCmd.instanceCount;
939 #endif
940     }
941 }
942 
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)943 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
944     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
945 {
946     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle);
947     if (gpuBuffer) {
948         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
949         const VkBuffer buffer = plat.buffer;
950         const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
951         if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
952             vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
953                 buffer,                                    // buffer
954                 offset,                                    // offset
955                 renderCmd.drawCount,                       // drawCount
956                 renderCmd.stride);                         // stride
957         } else {
958             vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
959                 buffer,                             // buffer
960                 (VkDeviceSize)renderCmd.offset,     // offset
961                 renderCmd.drawCount,                // drawCount
962                 renderCmd.stride);                  // stride
963         }
964 #if (RENDER_PERF_ENABLED == 1)
965         stateCache.perfCounters.drawIndirectCount++;
966 #endif
967     }
968 }
969 
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)970 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
971     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
972 {
973     vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
974         renderCmd.groupCountX,          // groupCountX
975         renderCmd.groupCountY,          // groupCountY
976         renderCmd.groupCountZ);         // groupCountZ
977 #if (RENDER_PERF_ENABLED == 1)
978     stateCache.perfCounters.dispatchCount++;
979 #endif
980 }
981 
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)982 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
983     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
984     const StateCache& stateCache)
985 {
986     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle);
987     if (gpuBuffer) {
988         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
989         const VkBuffer buffer = plat.buffer;
990         const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
991         vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
992             buffer,                                 // buffer
993             offset);                                // offset
994 #if (RENDER_PERF_ENABLED == 1)
995         stateCache.perfCounters.dispatchIndirectCount++;
996 #endif
997     }
998 }
999 
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1000 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1001     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1002     StateCache& stateCache)
1003 {
1004     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1005     stateCache.renderCommandBeginRenderPass = &renderCmd;
1006 
1007     NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1008     // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1009     stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1010 
1011     // early out for multi render command list render pass
1012     if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1013         constexpr VkSubpassContents subpassContents { VkSubpassContents::VK_SUBPASS_CONTENTS_INLINE };
1014         vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1015             subpassContents);                  // contents
1016 
1017         return; // early out
1018     }
1019 
1020     const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1021 
1022     VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1023     bool hasClearValues = false;
1024     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1025         const auto& ref = renderPassDesc.attachments[idx];
1026         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1027             ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1028             const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1029             VkClearValue clearValue;
1030             if (RenderHandleUtil::IsDepthImage(handle)) {
1031                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1032                 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1033                 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1034             } else {
1035                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1036                 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1037                         sizeof(ref.clearValue.color))) {
1038                     PLUGIN_LOG_E("Copying of clearValue.color failed.");
1039                 }
1040             }
1041             clearValues[idx] = clearValue;
1042             hasClearValues = true;
1043         }
1044     }
1045 
1046     // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1047     // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1048     const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1049 
1050     VkRect2D renderArea {
1051         { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1052         { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1053     };
1054     // render area needs to be inside frame buffer
1055     const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1056     renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1057     renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1058     renderArea.extent.width = Math::min(renderArea.extent.width,
1059         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1060     renderArea.extent.height = Math::min(renderArea.extent.height,
1061         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1062 
1063     const VkRenderPassBeginInfo renderPassBeginInfo {
1064         VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,      // sType
1065         nullptr,                                       // pNext
1066         stateCache.lowLevelRenderPassData.renderPass,  // renderPass
1067         stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1068         renderArea,                                    // renderArea
1069         clearValueCount,                               // clearValueCount
1070         clearValues,                                   // pClearValues
1071     };
1072 
1073     const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1074     vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1075         &renderPassBeginInfo,                  // pRenderPassBegin
1076         subpassContents);                      // contents
1077 #if (RENDER_PERF_ENABLED == 1)
1078     stateCache.perfCounters.renderPassCount++;
1079 #endif
1080 }
1081 
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1082 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1083     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1084 {
1085     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1086 
1087     const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1088     vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1089         subpassContents);                  // contents
1090 }
1091 
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1092 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1093     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1094 {
1095     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1096 
1097     // early out for multi render command list render pass
1098     if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1099         return; // NOTE
1100     }
1101 
1102     stateCache.renderCommandBeginRenderPass = nullptr;
1103     stateCache.lowLevelRenderPassData = {};
1104 
1105     vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1106 }
1107 
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1108 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1109     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1110     const StateCache& stateCache)
1111 {
1112     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1113     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1114 
1115     const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1116 
1117     VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1118     VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1119     const GpuBufferVk* gpuBuffer = nullptr;
1120     RenderHandle currBufferHandle;
1121     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1122         const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1123         // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1124         // do not re-fetch the buffer if not needed
1125         if (currBufferHandle.id != currVb.bufferHandle.id) {
1126             currBufferHandle = currVb.bufferHandle;
1127             gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1128         }
1129         PLUGIN_ASSERT(gpuBuffer);
1130         if (gpuBuffer) {
1131             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1132             const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1133             vertexBuffers[idx] = plat.buffer;
1134             offsets[idx] = offset;
1135         }
1136     }
1137 
1138     vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1139         0,                                       // firstBinding
1140         vertexBufferCount,                       // bindingCount
1141         vertexBuffers,                           // pBuffers
1142         offsets);                                // pOffsets
1143 }
1144 
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1145 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1146     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1147     const StateCache& stateCache)
1148 {
1149     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1150 
1151     PLUGIN_ASSERT(gpuBuffer);
1152     if (gpuBuffer) {
1153         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1154         const VkBuffer buffer = plat.buffer;
1155         const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1156         const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1157 
1158         vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1159             buffer,                                // buffer
1160             offset,                                // offset
1161             indexType);                            // indexType
1162     }
1163 }
1164 
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1165 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1166     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1167 {
1168     const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1169     const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1170     if (srcImagePtr && dstImagePtr) {
1171         const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1172         const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1173 
1174         const ImageBlit& ib = renderCmd.imageBlit;
1175         const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1176                                            ? srcPlatImage.arrayLayers
1177                                            : ib.srcSubresource.layerCount;
1178         const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1179                                            ? dstPlatImage.arrayLayers
1180                                            : ib.dstSubresource.layerCount;
1181 
1182         const VkImageSubresourceLayers srcSubresourceLayers {
1183             (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1184             ib.srcSubresource.mipLevel,                             // mipLevel
1185             ib.srcSubresource.baseArrayLayer,                       // baseArrayLayer
1186             srcLayerCount,                                          // layerCount
1187         };
1188         const VkImageSubresourceLayers dstSubresourceLayers {
1189             (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1190             ib.dstSubresource.mipLevel,                             // mipLevel
1191             ib.dstSubresource.baseArrayLayer,                       // baseArrayLayer
1192             dstLayerCount,                                          // layerCount
1193         };
1194 
1195         const VkImageBlit imageBlit {
1196             srcSubresourceLayers, // srcSubresource
1197 
1198             { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1199                 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1200                     (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1201 
1202             dstSubresourceLayers, // dstSubresource
1203 
1204             { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1205                 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1206                     (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1207         };
1208 
1209         vkCmdBlitImage(cmdBuf.commandBuffer,         // commandBuffer
1210             srcPlatImage.image,                      // srcImage
1211             (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1212             dstPlatImage.image,                      // dstImage
1213             (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1214             1,                                       // regionCount
1215             &imageBlit,                              // pRegions
1216             (VkFilter)renderCmd.filter);             // filter
1217     }
1218 }
1219 
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1220 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1221     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1222 {
1223     const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1224     const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1225 
1226     PLUGIN_ASSERT(srcGpuBuffer);
1227     PLUGIN_ASSERT(dstGpuBuffer);
1228 
1229     if (srcGpuBuffer && dstGpuBuffer) {
1230         const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1231         const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1232         const VkBufferCopy bufferCopy {
1233             renderCmd.bufferCopy.srcOffset,
1234             renderCmd.bufferCopy.dstOffset,
1235             renderCmd.bufferCopy.size,
1236         };
1237 
1238         if (bufferCopy.size > 0) {
1239             vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1240                 srcBuffer,                        // srcBuffer
1241                 dstBuffer,                        // dstBuffer
1242                 1,                                // regionCount
1243                 &bufferCopy);                     // pRegions
1244         }
1245     }
1246 }
1247 
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1248 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1249     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1250     const StateCache& stateCache)
1251 {
1252     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1253         PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1254         return;
1255     }
1256 
1257     const GpuBufferVk* gpuBuffer = nullptr;
1258     const GpuImageVk* gpuImage = nullptr;
1259     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1260         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1261         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1262     } else {
1263         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1264         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1265     }
1266 
1267     if (gpuBuffer && gpuImage) {
1268         const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1269         const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1270         const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1271         const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1272                                         ? platImage.arrayLayers
1273                                         : subresourceLayer.layerCount;
1274         const VkImageSubresourceLayers imageSubresourceLayer {
1275             (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1276             subresourceLayer.mipLevel,
1277             subresourceLayer.baseArrayLayer,
1278             layerCount,
1279         };
1280         const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1281         // Math::min to force staying inside image
1282         const uint32_t mip = subresourceLayer.mipLevel;
1283         const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1284         const Size3D& imageOffset = bufferImageCopy.imageOffset;
1285         const VkExtent3D imageExtent = {
1286             Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1287             Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1288             Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1289         };
1290         const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1291                            (imageOffset.depth < imageSize.depth);
1292         const VkBufferImageCopy bufferImageCopyVk {
1293             bufferImageCopy.bufferOffset,
1294             bufferImageCopy.bufferRowLength,
1295             bufferImageCopy.bufferImageHeight,
1296             imageSubresourceLayer,
1297             { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1298                 static_cast<int32_t>(imageOffset.depth) },
1299             imageExtent,
1300         };
1301 
1302         const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1303         const VkImage image = (gpuImage->GetPlatformData()).image;
1304 
1305         if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1306             vkCmdCopyBufferToImage(cmdBuf.commandBuffer,             // commandBuffer
1307                 buffer,                                              // srcBuffer
1308                 image,                                               // dstImage
1309                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1310                 1,                                                   // regionCount
1311                 &bufferImageCopyVk);                                 // pRegions
1312         } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1313             vkCmdCopyImageToBuffer(cmdBuf.commandBuffer,             // commandBuffer
1314                 image,                                               // srcImage
1315                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1316                 buffer,                                              // dstBuffer
1317                 1,                                                   // regionCount
1318                 &bufferImageCopyVk);                                 // pRegions
1319         }
1320     }
1321 }
1322 
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1323 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1324     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1325 {
1326     const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1327     const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1328     if (srcGpuImage && dstGpuImage) {
1329         const ImageCopy& copy = renderCmd.imageCopy;
1330         const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1331         const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1332 
1333         const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1334         const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1335         const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1336                                            ? srcPlatImage.arrayLayers
1337                                            : srcSubresourceLayer.layerCount;
1338         const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1339                                            ? dstPlatImage.arrayLayers
1340                                            : dstSubresourceLayer.layerCount;
1341 
1342         const VkImageSubresourceLayers srcImageSubresourceLayer {
1343             (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1344             srcSubresourceLayer.mipLevel,
1345             srcSubresourceLayer.baseArrayLayer,
1346             srcLayerCount,
1347         };
1348         const VkImageSubresourceLayers dstImageSubresourceLayer {
1349             (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1350             dstSubresourceLayer.mipLevel,
1351             dstSubresourceLayer.baseArrayLayer,
1352             dstLayerCount,
1353         };
1354 
1355         const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1356         const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1357 
1358         VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1359         ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1360         ext.height =
1361             Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1362         ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1363 
1364         const VkImageCopy imageCopyVk {
1365             srcImageSubresourceLayer,                                 // srcSubresource
1366             { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1367             dstImageSubresourceLayer,                                 // dstSubresource
1368             { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1369             ext,                                                      // extent
1370         };
1371         vkCmdCopyImage(cmdBuf.commandBuffer,                     // commandBuffer
1372             srcPlatImage.image,                                  // srcImage
1373             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1374             dstPlatImage.image,                                  // dstImage
1375             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1376             1,                                                   // regionCount
1377             &imageCopyVk);                                       // pRegions
1378     }
1379 }
1380 
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1381 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1382     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1383     const RenderBarrierList& rbl)
1384 {
1385     if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1386         return;
1387     }
1388 
1389     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1390         rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1391     PLUGIN_ASSERT(barrierPointBarriers);
1392     if (!barrierPointBarriers) {
1393         return;
1394     }
1395     constexpr uint32_t maxBarrierCount { 8 };
1396     VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1397     VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1398     VkMemoryBarrier memoryBarriers[maxBarrierCount];
1399 
1400     // generally there is only single barrierListCount per barrier point
1401     // in situations with batched render passes there can be many
1402     // NOTE: all barrier lists could be patched to single vk command if needed
1403     // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1404     const uint32_t barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1405     const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1406 #if (RENDER_VALIDATION_ENABLED == 1)
1407     uint32_t fullBarrierCount = 0u;
1408 #endif
1409     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1410         if (nextBarrierList == nullptr) { // cannot be null, just a safety
1411             PLUGIN_ASSERT(false);
1412             return;
1413         }
1414         const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1415         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1416         const uint32_t barrierCount = (uint32_t)barrierListRef.count;
1417 
1418         uint32_t bufferBarrierIdx = 0;
1419         uint32_t imageBarrierIdx = 0;
1420         uint32_t memoryBarrierIdx = 0;
1421 
1422         VkPipelineStageFlags srcPipelineStageMask { 0 };
1423         VkPipelineStageFlags dstPipelineStageMask { 0 };
1424         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1425 
1426         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1427             const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1428 
1429             uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1430             uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1431             if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1432                 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1433                 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1434             }
1435 
1436             const RenderHandle resourceHandle = ref.resourceHandle;
1437             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1438 
1439             PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1440                           (handleType == RenderHandleType::GPU_IMAGE));
1441 
1442             const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1443             const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1444 
1445             srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1446             dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1447 
1448             // NOTE: zero size buffer barriers allowed ATM
1449             if (handleType == RenderHandleType::GPU_BUFFER) {
1450                 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle);
1451                 PLUGIN_ASSERT(gpuBuffer);
1452                 if (gpuBuffer) {
1453                     const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1454                     // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1455                     const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1456                     const VkDeviceSize size =
1457                         Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1458                             (VkDeviceSize)ref.dst.optionalByteSize);
1459                     bufferMemoryBarriers[bufferBarrierIdx++] = {
1460                         VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1461                         nullptr,                                 // pNext
1462                         srcAccessMask,                           // srcAccessMask
1463                         dstAccessMask,                           // dstAccessMask
1464                         srcQueueFamilyIndex,                     // srcQueueFamilyIndex
1465                         dstQueueFamilyIndex,                     // dstQueueFamilyIndex
1466                         platBuffer.buffer,                       // buffer
1467                         offset,                                  // offset
1468                         size,                                    // size
1469                     };
1470                 }
1471             } else if (handleType == RenderHandleType::GPU_IMAGE) {
1472                 const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle);
1473                 PLUGIN_ASSERT(gpuImage);
1474                 if (gpuImage) {
1475                     const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1476 
1477                     const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1478                     const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1479 
1480                     const VkImageAspectFlags imageAspectFlags =
1481                         (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1482                             ? platImage.aspectFlags
1483                             : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1484 
1485                     const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1486                                                     PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1487                                                     ? platImage.mipLevels
1488                                                     : ref.src.optionalImageSubresourceRange.levelCount;
1489                     PLUGIN_ASSERT(levelCount <= platImage.mipLevels);
1490 
1491                     const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1492                                                     PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1493                                                     ? platImage.arrayLayers
1494                                                     : ref.src.optionalImageSubresourceRange.layerCount;
1495                     PLUGIN_ASSERT(layerCount <= platImage.arrayLayers);
1496 
1497                     const VkImageSubresourceRange imageSubresourceRange {
1498                         imageAspectFlags,                                     // aspectMask
1499                         ref.src.optionalImageSubresourceRange.baseMipLevel,   // baseMipLevel
1500                         levelCount,                                           // levelCount
1501                         ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1502                         layerCount,                                           // layerCount
1503                     };
1504 
1505                     imageMemoryBarriers[imageBarrierIdx++] = {
1506                         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1507                         nullptr,                                // pNext
1508                         srcAccessMask,                          // srcAccessMask
1509                         dstAccessMask,                          // dstAccessMask
1510                         srcImageLayout,                         // oldLayout
1511                         dstImageLayout,                         // newLayout
1512                         srcQueueFamilyIndex,                    // srcQueueFamilyIndex
1513                         dstQueueFamilyIndex,                    // dstQueueFamilyIndex
1514                         platImage.image,                        // image
1515                         imageSubresourceRange,                  // subresourceRange
1516                     };
1517                 }
1518             } else {
1519                 memoryBarriers[memoryBarrierIdx++] = {
1520                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1521                     nullptr,                          // pNext
1522                     srcAccessMask,                    // srcAccessMask
1523                     dstAccessMask,                    // dstAccessMask
1524                 };
1525             }
1526 
1527             const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1528             const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1529                                            (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1530                                            ? true
1531                                            : false;
1532 
1533             if (hasBarriers && resetBarriers) {
1534 #if (RENDER_VALIDATION_ENABLED == 1)
1535                 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1536 #endif
1537                 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1538                     srcPipelineStageMask,                  // srcStageMask
1539                     dstPipelineStageMask,                  // dstStageMask
1540                     dependencyFlags,                       // dependencyFlags
1541                     memoryBarrierIdx,                      // memoryBarrierCount
1542                     memoryBarriers,                        // pMemoryBarriers
1543                     bufferBarrierIdx,                      // bufferMemoryBarrierCount
1544                     bufferMemoryBarriers,                  // pBufferMemoryBarriers
1545                     imageBarrierIdx,                       // imageMemoryBarrierCount
1546                     imageMemoryBarriers);                  // pImageMemoryBarriers
1547 
1548                 bufferBarrierIdx = 0;
1549                 imageBarrierIdx = 0;
1550                 memoryBarrierIdx = 0;
1551             }
1552         }
1553     }
1554 #if (RENDER_VALIDATION_ENABLED == 1)
1555     if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1556         PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1557             "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1558             barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1559     }
1560 #endif
1561 }
1562 
RenderCommand(const RenderCommandUpdateDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)1563 void RenderBackendVk::RenderCommand(const RenderCommandUpdateDescriptorSets& renderCmd,
1564     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1565     const StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1566 {
1567     // NOTE: update to update all at once
1568     NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)ncdsm;
1569 
1570     for (uint32_t descIdx = 0; descIdx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++descIdx) {
1571         const RenderHandle descHandle = renderCmd.descriptorSetHandles[descIdx];
1572         if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
1573             continue;
1574         }
1575 
1576         // first update gpu descriptor indices
1577         ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1578 
1579         // actual vulkan descriptor set update
1580         const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descHandle);
1581 
1582         if (descriptorSet) {
1583             const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1584 #if (RENDER_VALIDATION_ENABLED == 1)
1585             // get descriptor counts
1586             const LowLevelDescriptorCountsVk& descriptorCounts = aNcdsmVk.GetLowLevelDescriptorCounts(descHandle);
1587             if ((uint32_t)bindingResources.bindings.size() > descriptorCounts.writeDescriptorCount) {
1588                 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1589             }
1590 #endif
1591             const uint32_t bindingCount = Math::min(
1592                 (uint32_t)bindingResources.bindings.size(), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT);
1593             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
1594 
1595             // max counts
1596             VkDescriptorBufferInfo descriptorBufferInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1597             VkDescriptorImageInfo descriptorImageInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1598             VkDescriptorImageInfo descriptorSamplerInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1599 #if (RENDER_VULKAN_RT_ENABLED == 1)
1600             VkWriteDescriptorSetAccelerationStructureKHR
1601                 descriptorAccelInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1602             uint32_t accelIndex = 0;
1603 #endif
1604             VkWriteDescriptorSet writeDescriptorSet[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1605 
1606             const auto& buffers = bindingResources.buffers;
1607             const auto& images = bindingResources.images;
1608             const auto& samplers = bindingResources.samplers;
1609             uint32_t bufferIndex = 0;
1610             uint32_t imageIndex = 0;
1611             uint32_t samplerIndex = 0;
1612             uint32_t writeBindIdx = 0;
1613             for (const auto& ref : buffers) {
1614                 const uint32_t descriptorCount = ref.binding.descriptorCount;
1615                 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1616                 if (descriptorCount == 0) {
1617                     continue;
1618                 }
1619                 const uint32_t arrayOffset = ref.arrayOffset;
1620                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1621                 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1622 #if (RENDER_VULKAN_RT_ENABLED == 1)
1623                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1624                         // first is the ref, starting from 1 we use array offsets
1625                         const BindableBuffer& bRes =
1626                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1627                         const GpuAccelerationStructureVk* accelPtr =
1628                             gpuResourceMgr_.GetAccelerationStructure<GpuAccelerationStructureVk>(bRes.handle);
1629                         if (accelPtr) {
1630                             const GpuAccelerationStructurePlatformDataVk& platAccel = accelPtr->GetPlatformData();
1631                             descriptorAccelInfos[accelIndex + idx] = {
1632                                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1633                                 nullptr,                                                           // pNext
1634                                 descriptorCount,                  // accelerationStructureCount
1635                                 &platAccel.accelerationStructure, // pAccelerationStructures
1636                             };
1637                         }
1638                     }
1639                     writeDescriptorSet[writeBindIdx++] = {
1640                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1641                         &descriptorAccelInfos[accelIndex],            // pNext
1642                         descriptorSet->descriptorSet,                 // dstSet
1643                         ref.binding.binding,                          // dstBinding
1644                         0,                                            // dstArrayElement
1645                         descriptorCount,                              // descriptorCount
1646                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1647                         nullptr,                                      // pImageInfo
1648                         nullptr,                                      // pBufferInfo
1649                         nullptr,                                      // pTexelBufferView
1650                     };
1651                     accelIndex += descriptorCount;
1652 #endif
1653                 } else {
1654                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1655                         // first is the ref, starting from 1 we use array offsets
1656                         const BindableBuffer& bRes =
1657                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1658                         const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1659                         const GpuBufferVk* bufferPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle);
1660                         if (bufferPtr) {
1661                             const GpuBufferPlatformDataVk& platBuffer = bufferPtr->GetPlatformData();
1662                             // takes into account dynamic ring buffers with mapping
1663                             const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1664                             const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1665                             const VkDeviceSize bufferRange =
1666                                 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1667                                     (VkDeviceSize)bRes.byteSize);
1668                             descriptorBufferInfos[bufferIndex + idx] = {
1669                                 platBuffer.buffer, // buffer
1670                                 byteOffset,        // offset
1671                                 bufferRange,       // range
1672                             };
1673                         }
1674                     }
1675                     writeDescriptorSet[writeBindIdx++] = {
1676                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1677                         nullptr,                                      // pNext
1678                         descriptorSet->descriptorSet,                 // dstSet
1679                         ref.binding.binding,                          // dstBinding
1680                         0,                                            // dstArrayElement
1681                         descriptorCount,                              // descriptorCount
1682                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1683                         nullptr,                                      // pImageInfo
1684                         &descriptorBufferInfos[bufferIndex],          // pBufferInfo
1685                         nullptr,                                      // pTexelBufferView
1686                     };
1687                     bufferIndex += descriptorCount;
1688                 }
1689             }
1690             for (const auto& ref : images) {
1691                 const uint32_t descriptorCount = ref.binding.descriptorCount;
1692                 // skip, array bindings which are bound from first index have also descriptorCount 0
1693                 if (descriptorCount == 0) {
1694                     continue;
1695                 }
1696                 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
1697                 const uint32_t arrayOffset = ref.arrayOffset;
1698                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1699                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1700                     // first is the ref, starting from 1 we use array offsets
1701                     const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
1702                     const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle);
1703                     if (imagePtr) {
1704                         VkSampler sampler = VK_NULL_HANDLE;
1705                         if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
1706                             const GpuSamplerVk* samplerPtr =
1707                                 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
1708                             if (samplerPtr) {
1709                                 sampler = samplerPtr->GetPlatformData().sampler;
1710                             }
1711                         }
1712                         const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
1713                         const GpuImagePlatformDataViewsVk& platImageViews = imagePtr->GetPlatformDataViews();
1714                         VkImageView imageView = platImage.imageView;
1715                         if ((bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
1716                             (bRes.mip < platImageViews.mipImageViews.size())) {
1717                             imageView = platImageViews.mipImageViews[bRes.mip];
1718                         } else if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
1719                                    (bRes.layer < platImageViews.layerImageViews.size())) {
1720                             imageView = platImageViews.layerImageViews[bRes.layer];
1721                         }
1722                         descriptorImageInfos[imageIndex + idx] = {
1723                             sampler,                         // sampler
1724                             imageView,                       // imageView
1725                             (VkImageLayout)bRes.imageLayout, // imageLayout
1726                         };
1727                     }
1728                 }
1729                 writeDescriptorSet[writeBindIdx++] = {
1730                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1731                     nullptr,                                // pNext
1732                     descriptorSet->descriptorSet,           // dstSet
1733                     ref.binding.binding,                    // dstBinding
1734                     0,                                      // dstArrayElement
1735                     descriptorCount,                        // descriptorCount
1736                     descriptorType,                         // descriptorType
1737                     &descriptorImageInfos[imageIndex],      // pImageInfo
1738                     nullptr,                                // pBufferInfo
1739                     nullptr,                                // pTexelBufferView
1740                 };
1741                 imageIndex += descriptorCount;
1742             }
1743             for (const auto& ref : samplers) {
1744                 const uint32_t descriptorCount = ref.binding.descriptorCount;
1745                 // skip, array bindings which are bound from first index have also descriptorCount 0
1746                 if (descriptorCount == 0) {
1747                     continue;
1748                 }
1749                 const uint32_t arrayOffset = ref.arrayOffset;
1750                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
1751                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1752                     // first is the ref, starting from 1 we use array offsets
1753                     const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
1754                     const GpuSamplerVk* samplerPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle);
1755                     if (samplerPtr) {
1756                         const GpuSamplerPlatformDataVk& platSampler = samplerPtr->GetPlatformData();
1757                         descriptorSamplerInfos[samplerIndex + idx] = {
1758                             platSampler.sampler,      // sampler
1759                             VK_NULL_HANDLE,           // imageView
1760                             VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
1761                         };
1762                     }
1763                 }
1764                 writeDescriptorSet[writeBindIdx++] = {
1765                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1766                     nullptr,                                      // pNext
1767                     descriptorSet->descriptorSet,                 // dstSet
1768                     ref.binding.binding,                          // dstBinding
1769                     0,                                            // dstArrayElement
1770                     descriptorCount,                              // descriptorCount
1771                     (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1772                     &descriptorSamplerInfos[samplerIndex],        // pImageInfo
1773                     nullptr,                                      // pBufferInfo
1774                     nullptr,                                      // pTexelBufferView
1775                 };
1776                 samplerIndex += descriptorCount;
1777             }
1778             vkUpdateDescriptorSets(device, // device
1779                 bindingCount,              // descriptorWriteCount
1780                 writeDescriptorSet,        // pDescriptorWrites
1781                 0,                         // descriptorCopyCount
1782                 nullptr);                  // pDescriptorCopies
1783 #if (RENDER_PERF_ENABLED == 1)
1784             stateCache.perfCounters.updateDescriptorSetCount++;
1785 #endif
1786         }
1787     }
1788 }
1789 
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)1790 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
1791     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1792     StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
1793 {
1794     const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
1795 
1796     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
1797     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
1798     const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
1799                                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
1800                                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
1801     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
1802 
1803     const bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
1804     PLUGIN_ASSERT(valid); // render command list should enforce this
1805     const uint32_t firstSet = renderCmd.firstSet;
1806     const uint32_t setCount = renderCmd.setCount;
1807     if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
1808         uint32_t combinedDynamicOffsetCount = 0;
1809         uint32_t dynamicOffsetDescriptorSetIndices = 0;
1810         uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
1811 
1812         VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1813         const uint32_t firstPlusCount = firstSet + setCount;
1814         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
1815             const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
1816             if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
1817                 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
1818                 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
1819                 combinedDynamicOffsetCount += dynamicDescriptorCount;
1820 
1821                 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
1822                 if (descriptorSet) {
1823                     PLUGIN_ASSERT(descriptorSet->descriptorSet);
1824                     descriptorSets[idx] = descriptorSet->descriptorSet;
1825                     // update, copy to state cache
1826                     PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
1827                     stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
1828                     const uint32_t currShift = (idx * 16u);
1829                     const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
1830                     uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
1831                     stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
1832                 }
1833             }
1834         }
1835 
1836         PLUGIN_ASSERT(combinedDynamicOffsetCount <= PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT);
1837         uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT];
1838         uint32_t dynamicOffsetIdx = 0;
1839         const uint32_t userDynamicOffsetCount = renderCmd.dynamicOffsetCount;
1840         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
1841             if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
1842                 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
1843                 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
1844                 const size_t dodResCount = dod.resources.size();
1845                 for (size_t dodIdx = 0; dodIdx < dodResCount; ++dodIdx) {
1846 #if (RENDER_VALIDATION_ENABLED == 1)
1847                     const GpuBuffer* gpuBuffer = gpuResourceMgr_.GetBuffer(dod.resources[dodIdx]);
1848                     PLUGIN_UNUSED(gpuBuffer);
1849                     PLUGIN_ASSERT(gpuBuffer);
1850 #endif
1851                     uint32_t byteOffset = 0;
1852                     if (dynamicOffsetIdx < userDynamicOffsetCount) {
1853                         byteOffset += renderCmd.dynamicOffsets[dynamicOffsetIdx];
1854                     }
1855                     dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
1856                 }
1857             }
1858         }
1859 
1860         if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
1861             vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
1862                 pipelineBindPoint,                        // pipelineBindPoint
1863                 pipelineLayout,                           // layout
1864                 firstSet,                                 // firstSet
1865                 setCount,                                 // descriptorSetCount
1866                 &descriptorSets[firstSet],                // pDescriptorSets
1867                 dynamicOffsetIdx,                         // dynamicOffsetCount
1868                 dynamicOffsets);                          // pDynamicOffsets
1869 #if (RENDER_PERF_ENABLED == 1)
1870             stateCache.perfCounters.bindDescriptorSetCount++;
1871 #endif
1872         } else {
1873             // possible pso re-creation and bind of these sets to the new pso
1874             const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
1875                 (PipelineBindPoint)pipelineBindPoint };
1876             RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
1877             RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
1878         }
1879     }
1880 }
1881 
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1882 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1883     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1884 {
1885     PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
1886     PLUGIN_ASSERT(renderCmd.data);
1887 
1888     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
1889     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
1890 
1891     const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
1892     PLUGIN_ASSERT(valid);
1893 
1894     if (valid) {
1895         const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
1896         vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
1897             pipelineLayout,                      // layout
1898             shaderStageFlags,                    // stageFlags
1899             0,                                   // offset
1900             renderCmd.pushConstant.byteSize,     // size
1901             static_cast<void*>(renderCmd.data)); // pValues
1902     }
1903 }
1904 
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1905 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
1906     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1907     const StateCache& stateCache)
1908 {
1909 #if (RENDER_VULKAN_RT_ENABLED == 1)
1910     // NOTE: missing
1911     const GpuAccelerationStructureVk* dst =
1912         gpuResourceMgr_.GetAccelerationStructure<const GpuAccelerationStructureVk>(renderCmd.dstAccelerationStructure);
1913     const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
1914     if (dst && scratchBuffer) {
1915         const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
1916         const VkDevice device = devicePlat.device;
1917 
1918         const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformData();
1919         const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
1920 
1921         // scratch data with user offset
1922         const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
1923                                             VkDeviceSize(renderCmd.scratchOffset) };
1924 
1925         const size_t arraySize =
1926             renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
1927         vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
1928         vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
1929 
1930         size_t arrayIndex = 0;
1931         for (const auto& trianglesRef : renderCmd.trianglesView) {
1932             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1933                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1934                 nullptr,                                               // pNext
1935                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
1936                 {},                                                    // geometry;
1937                 0,                                                     // flags
1938             };
1939             uint32_t primitiveCount = 0;
1940             const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
1941             const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
1942             if (vb && ib) {
1943                 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
1944                     device, vb->GetPlatformData().buffer) };
1945                 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
1946                     device, ib->GetPlatformData().buffer) };
1947                 VkDeviceOrHostAddressConstKHR transformData {};
1948                 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
1949                     if (const GpuBufferVk* tr =
1950                             gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
1951                         tr) {
1952                         transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
1953                     }
1954                 }
1955                 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
1956 
1957                 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
1958                 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
1959                     VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
1960                     nullptr,                                                              // pNext
1961                     VkFormat(trianglesRef.info.vertexFormat),                             // vertexFormat
1962                     vertexData,                                                           // vertexData
1963                     VkDeviceSize(trianglesRef.info.vertexStride),                         // vertexStride
1964                     trianglesRef.info.maxVertex,                                          // maxVertex
1965                     VkIndexType(trianglesRef.info.indexType),                             // indexType
1966                     indexData,                                                            // indexData
1967                     transformData,                                                        // transformData
1968                 };
1969             }
1970             buildRangeInfos[arrayIndex] = {
1971                 primitiveCount, // primitiveCount
1972                 0u,             // primitiveOffset
1973                 0u,             // firstVertex
1974                 0u,             // transformOffset
1975             };
1976             arrayIndex++;
1977         }
1978         for (const auto& aabbsRef : renderCmd.aabbsView) {
1979             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1980                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1981                 nullptr,                                               // pNext
1982                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
1983                 {},                                                    // geometry;
1984                 0,                                                     // flags
1985             };
1986             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
1987             const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle);
1988             if (iPtr) {
1989                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
1990             }
1991             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
1992             geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
1993                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
1994                 nullptr,                                                          // pNext
1995                 deviceAddress,                                                    // data
1996                 aabbsRef.info.stride,                                             // stride
1997             };
1998             buildRangeInfos[arrayIndex] = {
1999                 1u, // primitiveCount
2000                 0u, // primitiveOffset
2001                 0u, // firstVertex
2002                 0u, // transformOffset
2003             };
2004             arrayIndex++;
2005         }
2006         for (const auto& instancesRef : renderCmd.instancesView) {
2007             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2008                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2009                 nullptr,                                               // pNext
2010                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
2011                 {},                                                    // geometry;
2012                 0,                                                     // flags
2013             };
2014             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2015             const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2016             if (iPtr) {
2017                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2018             }
2019             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2020             geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2021                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2022                 nullptr,                                                              // pNext
2023                 instancesRef.info.arrayOfPointers,                                    // arrayOfPointers
2024                 deviceAddress,                                                        // data
2025             };
2026             buildRangeInfos[arrayIndex] = {
2027                 1u, // primitiveCount
2028                 0u, // primitiveOffset
2029                 0u, // firstVertex
2030                 0u, // transformOffset
2031             };
2032             arrayIndex++;
2033         }
2034 
2035         const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2036             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2037             nullptr,                                                          // pNext
2038             VkAccelerationStructureTypeKHR(renderCmd.type),                   // type
2039             VkBuildAccelerationStructureFlagsKHR(renderCmd.flags),            // flags
2040             VkBuildAccelerationStructureModeKHR(renderCmd.mode),              // mode
2041             VK_NULL_HANDLE,                                                   // srcAccelerationStructure
2042             dstAs,                                                            // dstAccelerationStructure
2043             uint32_t(arrayIndex),                                             // geometryCount
2044             geometryData.data(),                                              // pGeometries
2045             nullptr,                                                          // ppGeometries
2046             scratchData,                                                      // scratchData
2047         };
2048 
2049         vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2050         for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2051             buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2052         }
2053         const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2054         if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2055             extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2056                 1u,                                                                // infoCount
2057                 &buildGeometryInfo,                                                // pInfos
2058                 buildRangeInfosPtr.data());                                        // ppBuildRangeInfos
2059         }
2060     }
2061 #endif
2062 }
2063 
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2064 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2065     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2066     const StateCache& stateCache)
2067 {
2068     const ViewportDesc& vd = renderCmd.viewportDesc;
2069 
2070     const VkViewport viewport {
2071         vd.x,        // x
2072         vd.y,        // y
2073         vd.width,    // width
2074         vd.height,   // height
2075         vd.minDepth, // minDepth
2076         vd.maxDepth, // maxDepth
2077     };
2078 
2079     vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2080         0,                                 // firstViewport
2081         1,                                 // viewportCount
2082         &viewport);                        // pViewports
2083 }
2084 
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2085 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2086     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2087     const StateCache& stateCache)
2088 {
2089     const ScissorDesc& sd = renderCmd.scissorDesc;
2090 
2091     const VkRect2D scissor {
2092         { sd.offsetX, sd.offsetY },          // offset
2093         { sd.extentWidth, sd.extentHeight }, // extent
2094     };
2095 
2096     vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2097         0,                                // firstScissor
2098         1,                                // scissorCount
2099         &scissor);                        // pScissors
2100 }
2101 
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2102 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2103     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2104     const StateCache& stateCache)
2105 {
2106     vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2107         renderCmd.lineWidth);               // lineWidth
2108 }
2109 
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2110 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2111     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2112     const StateCache& stateCache)
2113 {
2114     vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2115         renderCmd.depthBiasConstantFactor,  // depthBiasConstantFactor
2116         renderCmd.depthBiasClamp,           // depthBiasClamp
2117         renderCmd.depthBiasSlopeFactor);    // depthBiasSlopeFactor
2118 }
2119 
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2120 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2121     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2122     const StateCache& stateCache)
2123 {
2124     vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2125         renderCmd.blendConstants);               // blendConstants[4]
2126 }
2127 
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2128 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2129     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2130     const StateCache& stateCache)
2131 {
2132     vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2133         renderCmd.minDepthBounds,             // minDepthBounds
2134         renderCmd.maxDepthBounds);            // maxDepthBounds
2135 }
2136 
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2137 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2138     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2139     const StateCache& stateCache)
2140 {
2141     const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2142 
2143     if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2144         vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2145             stencilFaceMask,                             // faceMask
2146             renderCmd.mask);                             // compareMask
2147     } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2148         vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2149             stencilFaceMask,                           // faceMask
2150             renderCmd.mask);                           // writeMask
2151     } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2152         vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2153             stencilFaceMask,                           // faceMask
2154             renderCmd.mask);                           // reference
2155     }
2156 }
2157 
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2158 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2159     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2160     const StateCache& stateCache)
2161 {
2162     if (stateCache.backendNode) {
2163         const RenderBackendRecordingStateVk recordingState = {
2164             {},
2165             cmdBuf.commandBuffer,                              // commandBuffer
2166             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2167             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2168             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2169             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2170             stateCache.pipelineLayout,                         // pipelineLayout
2171         };
2172         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2173         stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2174     }
2175 }
2176 
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2177 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2178     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2179 {
2180     PLUGIN_ASSERT_MSG(false, "not implemented");
2181 
2182     const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2183     const uint32_t queryIndex = renderCmd.queryIndex;
2184     VkQueryPool queryPool = VK_NULL_HANDLE;
2185 
2186     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2187         queryPool,                            // queryPool
2188         queryIndex,                           // firstQuery
2189         1);                                   // queryCount
2190 
2191     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2192         pipelineStageFlagBits,                // pipelineStage
2193         queryPool,                            // queryPool
2194         queryIndex);                          // query
2195 }
2196 
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf)2197 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf)
2198 {
2199     PLUGIN_ASSERT(presentationInfo_.presentationLayoutChangeNeeded);
2200     PLUGIN_ASSERT(presentationInfo_.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR);
2201 
2202     const GpuResourceState& state = presentationInfo_.renderGraphProcessedState;
2203     const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2204     const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2205     const VkPipelineStageFlags srcStageMask =
2206         ((VkPipelineStageFlags)state.pipelineStageFlags) | (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2207     const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2208     const VkImageLayout oldLayout = (VkImageLayout)presentationInfo_.imageLayout;
2209     const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2210     // NOTE: queue is not currently checked (should be in the same queue as last time used)
2211     constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2212     constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2213     constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2214     constexpr VkImageSubresourceRange imageSubresourceRange {
2215         VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2216         0,                                                // baseMipLevel
2217         1,                                                // levelCount
2218         0,                                                // baseArrayLayer
2219         1,                                                // layerCount
2220     };
2221 
2222     const VkImageMemoryBarrier imageMemoryBarrier {
2223         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2224         nullptr,                                // pNext
2225         srcAccessMask,                          // srcAccessMask
2226         dstAccessMask,                          // dstAccessMask
2227         oldLayout,                              // oldLayout
2228         newLayout,                              // newLayout
2229         srcQueueFamilyIndex,                    // srcQueueFamilyIndex
2230         dstQueueFamilyIndex,                    // dstQueueFamilyIndex
2231         presentationInfo_.swapchainImage,       // image
2232         imageSubresourceRange,                  // subresourceRange
2233     };
2234 
2235     vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2236         srcStageMask,                          // srcStageMask
2237         dstStageMask,                          // dstStageMask
2238         dependencyFlags,                       // dependencyFlags
2239         0,                                     // memoryBarrierCount
2240         nullptr,                               // pMemoryBarriers
2241         0,                                     // bufferMemoryBarrierCount
2242         nullptr,                               // pBufferMemoryBarriers
2243         1,                                     // imageMemoryBarrierCount
2244         &imageMemoryBarrier);                  // pImageMemoryBarriers
2245 
2246     presentationInfo_.presentationLayoutChangeNeeded = false;
2247     presentationInfo_.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2248 }
2249 
2250 #if (RENDER_PERF_ENABLED == 1)
2251 
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2252 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2253 {
2254     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2255         const string_view& debugName = renderCommandContext.debugName;
2256         if (timers_.count(debugName) == 0) { // new timers
2257 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2258             PerfDataSet& perfDataSet = timers_[debugName];
2259             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2260             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2261             constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2262             perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2263 #else
2264             timers_.insert({ debugName, {} });
2265 #endif
2266         }
2267     }
2268 
2269 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2270     perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2271     perfGpuTimerData_.currentOffset =
2272         (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2273 #endif
2274 }
2275 
EndFrameTimers()2276 void RenderBackendVk::EndFrameTimers()
2277 {
2278 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2279     perfGpuTimerData_.gpuBuffer->Unmap();
2280 #endif
2281     if (IPerformanceDataManagerFactory* globalPerfData =
2282             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2283         globalPerfData) {
2284         IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2285         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2286         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2287         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2288         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2289         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2290     }
2291 }
2292 
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits)2293 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2294     const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits)
2295 {
2296 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2297     PLUGIN_ASSERT(timers_.count(name) == 1);
2298     const PerfDataSet* perfDataSet = &timers_[name];
2299 
2300     const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
2301     PLUGIN_ASSERT(gpuQuery);
2302 
2303     const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2304     PLUGIN_ASSERT(platData.queryPool);
2305 
2306     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2307         platData.queryPool,                   // queryPool
2308         queryIndex,                           // firstQuery
2309         1);                                   // queryCount
2310 
2311     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2312         stageFlagBits,                        // pipelineStage,
2313         platData.queryPool,                   // queryPool,
2314         queryIndex);                          // query
2315 #endif
2316 }
2317 
2318 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2319 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2320 {
2321     perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2322     perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2323     perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2324     perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2325     perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2326     perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2327     perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2328     perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2329     perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2330     perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2331 }
2332 } // namespace
2333 
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & cache)2334 void RenderBackendVk::CopyPerfTimeStamp(
2335     const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& cache)
2336 {
2337     PLUGIN_ASSERT(timers_.count(name) == 1);
2338     const PerfDataSet* perfDataSet = &timers_[name];
2339 
2340 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2341     // take data from earlier queries to cpu
2342     // and copy in from query to gpu buffer
2343     const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2344     int64_t gpuMicroSeconds = 0;
2345     {
2346         auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2347         auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2348 
2349         const uint64_t startStamp = *currentData;
2350         const uint64_t endStamp = *(currentData + 1);
2351 
2352         const double timestampPeriod =
2353             static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2354                                     .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2355         constexpr int64_t nanosToMicrosDivisor { 1000 };
2356         gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2357         constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2358         if (gpuMicroSeconds > maxValidMicroSecondValue) {
2359             gpuMicroSeconds = 0;
2360         }
2361     }
2362 #endif
2363     const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2364 
2365     if (IPerformanceDataManagerFactory* globalPerfData =
2366             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2367         globalPerfData) {
2368         IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2369 
2370         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2371 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2372         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2373 
2374         const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
2375         PLUGIN_ASSERT(gpuQuery);
2376 
2377         const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2378 
2379         const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2380         PLUGIN_ASSERT(gpuBuffer);
2381         const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2382 
2383         constexpr uint32_t queryCount = 2;
2384         constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2385         constexpr VkQueryResultFlags queryResultFlags =
2386             VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2387 
2388         vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2389             platData.queryPool,                         // queryPool
2390             0,                                          // firstQuery
2391             queryCount,                                 // queryCount
2392             platBuffer.buffer,                          // dstBuffer
2393             currentFrameByteOffset,                     // dstOffset
2394             queryStride,                                // stride
2395             queryResultFlags);                          // flags
2396 #endif
2397         UpdatePerfCounters(*perfData, name, cache.perfCounters);
2398     }
2399 }
2400 
2401 #endif
2402 RENDER_END_NAMESPACE()
2403