1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_vk.h"
17
18 #include <cstdint>
19 #include <functional>
20 #include <vulkan/vulkan_core.h>
21
22 #include <base/containers/array_view.h>
23 #include <base/containers/fixed_string.h>
24 #include <base/containers/string_view.h>
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #include <core/plugin/intf_class_register.h>
28 #include <render/datastore/render_data_store_render_pods.h>
29 #include <render/device/pipeline_state_desc.h>
30 #include <render/namespace.h>
31 #include <render/nodecontext/intf_render_backend_node.h>
32 #include <render/vulkan/intf_device_vk.h>
33
34 #include "perf/cpu_perf_scope.h"
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40
41 #include "device/gpu_resource_handle_util.h"
42 #include "device/gpu_resource_manager.h"
43 #include "nodecontext/node_context_descriptor_set_manager.h"
44 #include "nodecontext/node_context_pool_manager.h"
45 #include "nodecontext/node_context_pso_manager.h"
46 #include "nodecontext/render_barrier_list.h"
47 #include "nodecontext/render_command_list.h"
48 #include "nodecontext/render_node_graph_node_store.h"
49 #include "render_backend.h"
50 #include "util/log.h"
51 #include "util/render_frame_util.h"
52 #include "vulkan/gpu_buffer_vk.h"
53 #include "vulkan/gpu_image_vk.h"
54 #include "vulkan/gpu_sampler_vk.h"
55 #include "vulkan/gpu_semaphore_vk.h"
56 #include "vulkan/node_context_descriptor_set_manager_vk.h"
57 #include "vulkan/node_context_pool_manager_vk.h"
58 #include "vulkan/pipeline_state_object_vk.h"
59 #include "vulkan/render_frame_sync_vk.h"
60 #include "vulkan/swapchain_vk.h"
61 #include "vulkan/validate_vk.h"
62
63 using namespace BASE_NS;
64
65 using CORE_NS::GetInstance;
66 using CORE_NS::IParallelTaskQueue;
67 using CORE_NS::IPerformanceDataManager;
68 using CORE_NS::IPerformanceDataManagerFactory;
69 using CORE_NS::ITaskQueueFactory;
70 using CORE_NS::IThreadPool;
71
72 RENDER_BEGIN_NAMESPACE()
73 namespace {
74 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)75 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
76 {
77 const VkBufferDeviceAddressInfo addressInfo {
78 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
79 nullptr, // pNext
80 buffer, // buffer
81 };
82 return vkGetBufferDeviceAddress(device, &addressInfo);
83 }
84 #endif
85 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)86 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
87 {
88 dst.drawCount += src.drawCount;
89 dst.drawIndirectCount += src.drawIndirectCount;
90 dst.dispatchCount += src.dispatchCount;
91 dst.dispatchIndirectCount += src.dispatchIndirectCount;
92 dst.bindPipelineCount += src.bindPipelineCount;
93 dst.renderPassCount += src.renderPassCount;
94 dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
95 dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
96 dst.triangleCount += src.triangleCount;
97 dst.instanceCount += src.instanceCount;
98 }
99 #endif
100 } // namespace
101
102 // Helper class for running std::function as a ThreadPool task.
103 class FunctionTask final : public IThreadPool::ITask {
104 public:
Create(std::function<void ()> func)105 static Ptr Create(std::function<void()> func)
106 {
107 return Ptr { new FunctionTask(BASE_NS::move(func)) };
108 }
109
FunctionTask(std::function<void ()> func)110 explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
111
operator ()()112 void operator()() override
113 {
114 func_();
115 }
116
117 protected:
Destroy()118 void Destroy() override
119 {
120 delete this;
121 }
122
123 private:
124 std::function<void()> func_;
125 };
126
127 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
128 namespace {
129 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
130 }
131 #endif
132
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,CORE_NS::ITaskQueue * const queue)133 RenderBackendVk::RenderBackendVk(Device& dev, GpuResourceManager& gpuResourceManager, CORE_NS::ITaskQueue* const queue)
134 : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
135 queue_(queue)
136 {
137 #if (RENDER_PERF_ENABLED == 1)
138 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
139 gpuQueryMgr_ = make_unique<GpuQueryManager>();
140
141 constexpr uint32_t maxQueryObjectCount { 512u };
142 constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
143 const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
144 const GpuBufferDesc desc {
145 BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT, // usageFlags
146 CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
147 0, // engineCreationFlags
148 fullByteSize, // byteSize
149 };
150 perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
151 perfGpuTimerData_.currentOffset = 0;
152 perfGpuTimerData_.frameByteSize = byteSize;
153 perfGpuTimerData_.fullByteSize = fullByteSize;
154 { // zero initialize
155 uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
156 memset_s(bufferData, fullByteSize, 0, fullByteSize);
157 perfGpuTimerData_.gpuBuffer->Unmap();
158 }
159 #endif
160 #endif
161 }
162
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)163 void RenderBackendVk::AcquirePresentationInfo(
164 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
165 {
166 RENDER_CPU_PERF_SCOPE("AcquirePresentationInfo", "");
167 if (device_.HasSwapchain()) {
168 presentationData_.present = true;
169 // resized to same for convenience
170 presentationData_.infos.resize(backBufferConfig.swapchainData.size());
171 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
172 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
173 PresentationInfo pi;
174 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175
176 if (const auto* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
177 swapchain) {
178 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
181 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
182 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
183 pi.swapchain = platSwapchain.swapchain;
184 pi.useSwapchain = true;
185 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
186 for (const auto& piRef : presentationData_.infos) {
187 if (piRef.swapchain == pi.swapchain) {
188 pi.useSwapchain = false;
189 }
190 }
191 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
192 if (pi.useSwapchain) {
193 const VkResult result = vkAcquireNextImageKHR(device, // device
194 vkSwapchain, // swapchin
195 UINT64_MAX, // timeout
196 pi.swapchainSemaphore, // semaphore
197 (VkFence) nullptr, // fence
198 &pi.swapchainImageIndex); // pImageIndex
199
200 switch (result) {
201 // Success
202 case VK_SUCCESS:
203 case VK_TIMEOUT:
204 case VK_NOT_READY:
205 case VK_SUBOPTIMAL_KHR:
206 pi.validAcquire = true;
207 break;
208
209 // Failure
210 case VK_ERROR_OUT_OF_HOST_MEMORY:
211 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
212 PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
213 return;
214 case VK_ERROR_DEVICE_LOST:
215 PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
216 return;
217 case VK_ERROR_OUT_OF_DATE_KHR:
218 PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
219 return;
220 case VK_ERROR_SURFACE_LOST_KHR:
221 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
222 return;
223
224 case VK_EVENT_SET:
225 case VK_EVENT_RESET:
226 case VK_INCOMPLETE:
227 case VK_ERROR_INITIALIZATION_FAILED:
228 case VK_ERROR_MEMORY_MAP_FAILED:
229 case VK_ERROR_LAYER_NOT_PRESENT:
230 case VK_ERROR_EXTENSION_NOT_PRESENT:
231 case VK_ERROR_FEATURE_NOT_PRESENT:
232 case VK_ERROR_INCOMPATIBLE_DRIVER:
233 case VK_ERROR_TOO_MANY_OBJECTS:
234 case VK_ERROR_FORMAT_NOT_SUPPORTED:
235 case VK_ERROR_FRAGMENTED_POOL:
236 case VK_ERROR_OUT_OF_POOL_MEMORY:
237 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
238 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
239 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
240 case VK_ERROR_VALIDATION_FAILED_EXT:
241 case VK_ERROR_INVALID_SHADER_NV:
242 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
243 case VK_ERROR_FRAGMENTATION_EXT:
244 case VK_ERROR_NOT_PERMITTED_EXT:
245 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
246 case VK_RESULT_MAX_ENUM:
247 default:
248 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
249 PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
250 device_.SetDeviceStatus(false);
251 break;
252 }
253
254 if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
255 PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
256 static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
257 }
258
259 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
260 const RenderHandle handle = swapchainData.remappableSwapchainImage;
261 if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
262 // remap image to backbuffer
263 const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
264 // special swapchain remapping
265 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
266 }
267 pi.renderGraphProcessedState = swapData.backBufferState;
268 pi.imageLayout = swapData.layout;
269 if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
270 pi.presentationLayoutChangeNeeded = true;
271 pi.renderNodeCommandListIndex =
272 static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
273
274 const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
275 PLUGIN_ASSERT(swapImage);
276 pi.swapchainImage = swapImage->GetPlatformData().image;
277 }
278 }
279 }
280 presentationData_.infos[swapIdx] = pi;
281 }
282 }
283 }
284
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)285 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
286 {
287 if (!queue_) {
288 return;
289 }
290 if (!backBufferConfig.swapchainData.empty()) {
291 if (device_.HasSwapchain() && presentationData_.present) {
292 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
293 uint32_t swapchainCount = 0U;
294 VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
295 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
296 uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
297 for (const auto& presRef : presentationData_.infos) {
298 // NOTE: default backbuffer might be present multiple times
299 // the flag useSwapchain should be false in these cases
300 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
301 PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
302 vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
303 vkSwapchains[swapchainCount++] = presRef.swapchain;
304 }
305 }
306 #if (RENDER_PERF_ENABLED == 1)
307 commonCpuTimers_.present.Begin();
308 #endif
309
310 // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
311 // semaphore)
312 if (swapchainCount > 0U) {
313 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
314 uint32_t waitSemaphoreCount = 0;
315 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
316 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
317 waitSemaphoreCount = 1;
318 }
319
320 const VkPresentInfoKHR presentInfo {
321 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
322 nullptr, // pNext
323 waitSemaphoreCount, // waitSemaphoreCount
324 &waitSemaphore, // pWaitSemaphores
325 swapchainCount, // swapchainCount
326 vkSwapchains, // pSwapchains
327 vkSwapImageIndices, // pImageIndices
328 nullptr // pResults
329 };
330
331 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
332 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
333 &presentInfo); // pPresentInfo
334
335 switch (result) {
336 // Success
337 case VK_SUCCESS:
338 break;
339 case VK_SUBOPTIMAL_KHR:
340 #if (RENDER_VALIDATION_ENABLED == 1)
341 PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
342 #endif
343 break;
344
345 // Failure
346 case VK_ERROR_OUT_OF_HOST_MEMORY:
347 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
348 PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
349 return;
350 case VK_ERROR_DEVICE_LOST:
351 PLUGIN_LOG_E("vkQueuePresentKHR device lost");
352 return;
353 case VK_ERROR_OUT_OF_DATE_KHR:
354 PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
355 return;
356 case VK_ERROR_SURFACE_LOST_KHR:
357 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
358 return;
359
360 case VK_NOT_READY:
361 case VK_TIMEOUT:
362 case VK_EVENT_SET:
363 case VK_EVENT_RESET:
364 case VK_INCOMPLETE:
365 case VK_ERROR_INITIALIZATION_FAILED:
366 case VK_ERROR_MEMORY_MAP_FAILED:
367 case VK_ERROR_LAYER_NOT_PRESENT:
368 case VK_ERROR_EXTENSION_NOT_PRESENT:
369 case VK_ERROR_FEATURE_NOT_PRESENT:
370 case VK_ERROR_INCOMPATIBLE_DRIVER:
371 case VK_ERROR_TOO_MANY_OBJECTS:
372 case VK_ERROR_FORMAT_NOT_SUPPORTED:
373 case VK_ERROR_FRAGMENTED_POOL:
374 case VK_ERROR_OUT_OF_POOL_MEMORY:
375 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
376 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
377 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
378 case VK_ERROR_VALIDATION_FAILED_EXT:
379 case VK_ERROR_INVALID_SHADER_NV:
380 case VK_ERROR_FRAGMENTATION_EXT:
381 case VK_ERROR_NOT_PERMITTED_EXT:
382 case VK_RESULT_MAX_ENUM:
383 default:
384 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
385 PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
386 break;
387 }
388 }
389 #if (RENDER_PERF_ENABLED == 1)
390 commonCpuTimers_.present.End();
391 #endif
392 } else {
393 #if (RENDER_VALIDATION_ENABLED == 1)
394 PLUGIN_LOG_ONCE_E(
395 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
396 #endif
397 }
398 }
399 }
400
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)401 void RenderBackendVk::Render(
402 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
403 {
404 if (!queue_) {
405 return;
406 }
407
408 // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410 commonCpuTimers_.full.Begin();
411 commonCpuTimers_.acquire.Begin();
412 #endif
413
414 commandBufferSubmitter_ = {};
415 commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416
417 presentationData_.present = false;
418 presentationData_.infos.clear();
419
420 #if (RENDER_PERF_ENABLED == 1)
421 commonCpuTimers_.acquire.End();
422
423 StartFrameTimers(renderCommandFrameData);
424 commonCpuTimers_.execute.Begin();
425 #endif
426
427 // global begin backend frame
428 auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
429 descriptorSetMgr.BeginBackendFrame();
430
431 // command list process loop/execute
432 // first tries to acquire swapchain if needed in a task
433 RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
434
435 #if (RENDER_PERF_ENABLED == 1)
436 commonCpuTimers_.execute.End();
437 commonCpuTimers_.submit.Begin();
438 #endif
439
440 PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
441 // submit vulkan command buffers
442 // checks that presentation info has valid acquire
443 RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
444
445 #if (RENDER_PERF_ENABLED == 1)
446 commonCpuTimers_.submit.End();
447 commonCpuTimers_.full.End();
448 EndFrameTimers();
449 #endif
450 }
451
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)452 void RenderBackendVk::RenderProcessSubmitCommandLists(
453 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
454 {
455 // NOTE: currently backtraces to final valid command buffer semaphore
456 uint32_t finalCommandBufferSubmissionIndex = ~0u;
457 commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
458 bool swapchainSemaphoreWaited = false;
459 for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
460 --cmdBufferIdx) {
461 if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
462 (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
463 VK_NULL_HANDLE)) {
464 finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
465 break;
466 }
467 }
468
469 for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
470 const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
471 if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
472 continue;
473 }
474
475 const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
476
477 uint32_t waitSemaphoreCount = 0u;
478 constexpr const uint32_t maxWaitSemaphoreCount =
479 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
480 VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
481 VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
482 for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
483 const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
484 PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
485
486 VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
487 if (waitSemaphore != VK_NULL_HANDLE) {
488 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
489 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
490 waitSemaphoreCount++;
491 }
492 }
493
494 if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
495 (!presentationData_.infos.empty())) {
496 swapchainSemaphoreWaited = true;
497 // go through all swapchain semaphores
498 for (const auto& presRef : presentationData_.infos) {
499 if (presRef.swapchainSemaphore) {
500 waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
501 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
502 waitSemaphoreCount++;
503 }
504 }
505 }
506
507 uint32_t signalSemaphoreCount = 0u;
508 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
509 constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
510 VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
511 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
512 VkFence fence = VK_NULL_HANDLE;
513 if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
514 // add fence signaling to last submission for frame sync
515 if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
516 fence = frameSync->GetFrameFence().fence;
517 frameSync->FrameFenceIsSignalled();
518 }
519 // signal external semaphores
520 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
521 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
522 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
523 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
524 if (externalSignals.size() == externalSemaphores.size()) {
525 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
526 // needs to be false
527 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
528 if (const auto* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get(); gs) {
529 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
530 externalSignals[sigIdx].signaled = true;
531 }
532 }
533 }
534 }
535 }
536
537 if (presentationData_.present) {
538 commandBufferSubmitter_.presentationWaitSemaphore =
539 commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
540 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
541 }
542 // add additional semaphores
543 for (const auto& swapRef : backBufferConfig.swapchainData) {
544 // should have been checked in render graph already
545 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
546 semaphores[signalSemaphoreCount++] =
547 VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
548 }
549 }
550 } else if (renderContextRef.submitDepencies.signalSemaphore) {
551 semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
552 }
553 PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
554
555 const VkSubmitInfo submitInfo {
556 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
557 nullptr, // pNext
558 waitSemaphoreCount, // waitSemaphoreCount
559 (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
560 waitSemaphorePipelineStageFlags, // pWaitDstStageMask
561 1, // commandBufferCount
562 &cmdSubmitterRef.commandBuffer, // pCommandBuffers
563 signalSemaphoreCount, // signalSemaphoreCount
564 (signalSemaphoreCount == 0) ? nullptr : semaphores, // pSignalSemaphores
565 };
566
567 const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
568 if (queue) {
569 RENDER_CPU_PERF_SCOPE("vkQueueSubmit", "");
570 VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
571 1, // submitCount
572 &submitInfo, // pSubmits
573 fence)); // fence
574 }
575 }
576 }
577
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)578 void RenderBackendVk::RenderProcessCommandLists(
579 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
580 {
581 // queue checked in upper level
582
583 const auto cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
584 constexpr uint64_t acquireTaskId { 0xFFFFffff0 };
585 constexpr uint64_t globalDescSetTaskId { 0xFFFFffff1 };
586 bool acquireSubmitted { false };
587 bool globalDescSetSubmitted { false };
588 vector<uint64_t> afterIdentifiers;
589 afterIdentifiers.reserve(2U); // global descriptor sets, and swapchain acquire wait
590 // submit global descset task if needed
591 {
592 auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
593 const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
594 if (!allDescSets.empty()) {
595 globalDescSetSubmitted = true;
596 queue_->Submit(globalDescSetTaskId, FunctionTask::Create([this]() { UpdateGlobalDescriptorSets(); }));
597 }
598 }
599 // submit acquire task if needed
600 if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
601 acquireSubmitted = true;
602 queue_->Submit(acquireTaskId, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
603 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
604 }));
605 }
606 uint64_t secondaryIdx = cmdBufferCount;
607 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
608 afterIdentifiers.clear();
609 // add wait for acquire if needed
610 if (acquireSubmitted && (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx)) {
611 afterIdentifiers.push_back(acquireTaskId);
612 }
613 // NOTE: idx increase
614 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
615 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
616 PLUGIN_ASSERT(mrpData.subpassCount > 0);
617 const uint32_t rcCount = mrpData.subpassCount;
618 // add wait for global descriptor sets if needed
619 // add safety wait for secondary command lists always (NOTE: needs to further optimized)
620 if (globalDescSetSubmitted &&
621 (mrpData.secondaryCmdLists || ref.renderCommandList->HasGlobalDescriptorSetBindings())) {
622 afterIdentifiers.push_back(globalDescSetTaskId);
623 }
624 if (mrpData.secondaryCmdLists) {
625 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
626 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
627 const uint64_t submitId = secondaryIdx++;
628 afterIdentifiers.push_back(submitId);
629 PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
630 queue_->SubmitAfter(afterIdentifiers, submitId,
631 FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
632 const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
633 MultiRenderCommandListDesc mrcDesc;
634 mrcDesc.multiRenderCommandListCount = 1u;
635 mrcDesc.baseContext = nullptr;
636 mrcDesc.secondaryCommandBuffer = true;
637 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
638 const DebugNames debugNames { ref2.debugName,
639 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
640 RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
641 }));
642 }
643 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
644 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
645 MultiRenderCommandListDesc mrcDesc;
646 mrcDesc.multiRenderCommandListCount = rcCount;
647 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
648 const DebugNames debugNames { ref2.debugName,
649 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
650 RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
651 }));
652 } else {
653 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
654 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
655 MultiRenderCommandListDesc mrcDesc;
656 mrcDesc.multiRenderCommandListCount = rcCount;
657 if (rcCount > 1) {
658 mrcDesc.multiRenderNodeCmdList = true;
659 mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
660 }
661 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
662 const uint32_t currIdx = cmdBufferIdx + rcIdx;
663 mrcDesc.multiRenderCommandListIndex = rcIdx;
664 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
665 const DebugNames debugNames { ref2.debugName,
666 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
667 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
668 }
669 }));
670 }
671 // idx increase
672 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
673 }
674
675 // execute and wait for completion.
676 queue_->Execute();
677 queue_->Clear();
678 }
679
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)680 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
681 RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
682 const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
683 {
684 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
685 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
686 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
687
688 const ContextCommandPoolVk& ptrCmdPool =
689 (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
690 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
691
692 // begin cmd buffer
693 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
694 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
695 const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
696 if (valid) {
697 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
698 ptrCmdPool.commandPool, // commandPool
699 commandPoolResetFlags)); // flags
700 }
701
702 constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
703 VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
704 };
705 const VkCommandBufferBeginInfo commandBufferBeginInfo {
706 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
707 nullptr, // pNext
708 commandBufferUsageFlags, // flags
709 nullptr, // pInheritanceInfo
710 };
711 if (valid) {
712 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
713 &commandBufferBeginInfo)); // pBeginInfo
714 }
715
716 StateCache stateCache;
717
718 const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
719 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
720 const auto commandCount = static_cast<uint32_t>(rcRef.size());
721 const RenderCommandBeginRenderPass* rcBeginRenderPass =
722 (mrpcld.rpBeginCmdIndex < commandCount)
723 ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
724 : nullptr;
725 const RenderCommandEndRenderPass* rcEndRenderPass =
726 (mrpcld.rpEndCmdIndex < commandCount)
727 ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
728 : nullptr;
729
730 if (rcBeginRenderPass && rcEndRenderPass) {
731 if (mrpcld.rpBarrierCmdIndex < commandCount) {
732 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
733 PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
734 const RenderCommandBarrierPoint& barrierPoint =
735 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
736 // handle all barriers before render command that needs resource syncing
737 RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
738 }
739
740 // begin render pass
741 stateCache.primaryRenderPass = true;
742 RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
743 stateCache.primaryRenderPass = false;
744
745 // get secondary command buffers from correct indices and execute
746 for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
747 const uint32_t currCmdBufIdx = cmdBufIdx + idx;
748 PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
749 const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
750 NodeContextPoolManagerVk& contextPoolVk =
751 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
752
753 const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
754 const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
755 const auto mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
756 // next subpass only called from second render pass on
757 if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
758 RenderCommandBeginRenderPass renderPass =
759 *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
760 renderPass.renderPassDesc.subpassContents =
761 SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
762 stateCache.renderCommandBeginRenderPass = nullptr; // reset
763 RenderCommand(
764 renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
765 }
766 RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
767 }
768
769 // end render pass (replace the primary render pass)
770 stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
771 // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
772 constexpr RenderCommandEndRenderPass rcerp = {};
773 RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
774 }
775
776 // end cmd buffer
777 if (valid) {
778 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
779 }
780
781 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
782 }
783
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)784 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
785 const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
786 {
787 if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
788 vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
789 1u, // commandBufferCount
790 &executeCmdBuffer.commandBuffer); // pCommandBuffers
791 }
792 }
793
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)794 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
795 const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
796 {
797 auto& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
798
799 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
800 const auto cmdCount = static_cast<uint32_t>(rcRef.size());
801
802 const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
803 PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
804 PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
805 if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
806 const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
807 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
808 const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
809 LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
810
811 const uint32_t subpass = renderCmd.subpassStartIndex;
812 return VkCommandBufferInheritanceInfo {
813 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
814 nullptr, // pNext
815 lowLevelRenderPassData.renderPass, // renderPass
816 subpass, // subpass
817 VK_NULL_HANDLE, // framebuffer
818 VK_FALSE, // occlusionQueryEnable
819 0, // queryFlags
820 0, // pipelineStatistics
821 };
822 } else {
823 return VkCommandBufferInheritanceInfo {};
824 }
825 }
826
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)827 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
828 const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
829 {
830 // these are validated in render graph
831 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
832 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
833 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
834 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
835 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
836
837 #if (RENDER_PERF_ENABLED == 1)
838 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
839 const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
840 const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
841 #endif
842 PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
843 PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
844 if (perfDataSet) {
845 perfDataSet->cpuTimer.Begin();
846 }
847
848 RENDER_CPU_PERF_SCOPE("RenderSingleCommandList", debugNames.renderCommandBufferName);
849 #endif
850
851 contextPoolMgr.BeginBackendFrame();
852 ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
853 nodeContextPsoMgr.BeginBackendFrame();
854
855 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
856
857 StateCache stateCache = {}; // state cache for this render command list
858 stateCache.backendNode = renderCommandCtx.renderBackendNode;
859 stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
860
861 // command buffer has been wait with a single frame fence
862 const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
863 const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
864 const bool endCommandBuffer =
865 (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
866 const ContextCommandPoolVk* ptrCmdPool = nullptr;
867 if (mrclDesc.multiRenderNodeCmdList) {
868 PLUGIN_ASSERT(mrclDesc.baseContext);
869 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
870 ->GetContextCommandPool();
871 } else if (mrclDesc.secondaryCommandBuffer) {
872 PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
873 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
874 } else {
875 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
876 }
877
878 // update cmd list context descriptor sets
879 UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
880
881 PLUGIN_ASSERT(ptrCmdPool);
882 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
883
884 if (beginCommandBuffer) {
885 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
886 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
887 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
888 ptrCmdPool->commandPool, // commandPool
889 commandPoolResetFlags)); // flags
890
891 VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
892 VkCommandBufferInheritanceInfo inheritanceInfo {};
893 if (stateCache.secondaryCommandBuffer) {
894 commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
895 inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
896 }
897 const VkCommandBufferBeginInfo commandBufferBeginInfo {
898 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
899 nullptr, // pNext
900 commandBufferUsageFlags, // flags
901 mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
902 };
903
904 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
905 &commandBufferBeginInfo)); // pBeginInfo
906
907 #if (RENDER_PERF_ENABLED == 1)
908 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
909 if (validGpuQueries) {
910 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
911 PLUGIN_ASSERT(gpuQuery);
912
913 gpuQuery->NextQueryIndex();
914
915 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
916 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
917 }
918 #endif
919 #endif
920 }
921
922 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
923 {
924 BeginDebugMarker(cmdBuffer, debugNames.renderCommandListName, { 1.f, 1.f, 1.f, 1.f });
925 }
926 #endif
927
928 for (const auto& ref : rcRef) {
929 if (!stateCache.validCommandList) {
930 #if (RENDER_VALIDATION_ENABLED == 1)
931 PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
932 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
933 debugNames.renderCommandListName.data());
934 #endif
935 break;
936 }
937
938 PLUGIN_ASSERT(ref.rc);
939 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
940 {
941 const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
942 BeginDebugMarker(cmdBuffer, COMMAND_NAMES[index], { 0.87f, 0.83f, 0.29f, 1.f });
943 }
944 #endif
945
946 switch (ref.type) {
947 case RenderCommandType::BARRIER_POINT: {
948 if (!stateCache.secondaryCommandBuffer) {
949 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
950 // handle all barriers before render command that needs resource syncing
951 RenderCommand(
952 barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
953 }
954 break;
955 }
956 case RenderCommandType::DRAW: {
957 RenderCommand(
958 *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
959 break;
960 }
961 case RenderCommandType::DRAW_INDIRECT: {
962 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
963 contextPoolMgr, stateCache);
964 break;
965 }
966 case RenderCommandType::DISPATCH: {
967 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
968 contextPoolMgr, stateCache);
969 break;
970 }
971 case RenderCommandType::DISPATCH_INDIRECT: {
972 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
973 contextPoolMgr, stateCache);
974 break;
975 }
976 case RenderCommandType::BIND_PIPELINE: {
977 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
978 contextPoolMgr, stateCache);
979 break;
980 }
981 case RenderCommandType::BEGIN_RENDER_PASS: {
982 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
983 contextPoolMgr, stateCache);
984 break;
985 }
986 case RenderCommandType::NEXT_SUBPASS: {
987 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
988 contextPoolMgr, stateCache);
989 break;
990 }
991 case RenderCommandType::END_RENDER_PASS: {
992 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
993 contextPoolMgr, stateCache);
994 break;
995 }
996 case RenderCommandType::BIND_VERTEX_BUFFERS: {
997 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
998 contextPoolMgr, stateCache);
999 break;
1000 }
1001 case RenderCommandType::BIND_INDEX_BUFFER: {
1002 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1003 contextPoolMgr, stateCache);
1004 break;
1005 }
1006 case RenderCommandType::COPY_BUFFER: {
1007 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1008 contextPoolMgr, stateCache);
1009 break;
1010 }
1011 case RenderCommandType::COPY_BUFFER_IMAGE: {
1012 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1013 contextPoolMgr, stateCache);
1014 break;
1015 }
1016 case RenderCommandType::COPY_IMAGE: {
1017 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1018 contextPoolMgr, stateCache);
1019 break;
1020 }
1021 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1022 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1023 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1024 break;
1025 }
1026 case RenderCommandType::PUSH_CONSTANT: {
1027 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1028 contextPoolMgr, stateCache);
1029 break;
1030 }
1031 case RenderCommandType::BLIT_IMAGE: {
1032 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1033 contextPoolMgr, stateCache);
1034 break;
1035 }
1036 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1037 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1038 nodeContextPsoMgr, contextPoolMgr, stateCache);
1039 break;
1040 }
1041 case RenderCommandType::CLEAR_COLOR_IMAGE: {
1042 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1043 contextPoolMgr, stateCache);
1044 break;
1045 }
1046 // dynamic states
1047 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1048 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1049 contextPoolMgr, stateCache);
1050 break;
1051 }
1052 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1053 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1054 contextPoolMgr, stateCache);
1055 break;
1056 }
1057 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1058 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1059 contextPoolMgr, stateCache);
1060 break;
1061 }
1062 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1063 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1064 contextPoolMgr, stateCache);
1065 break;
1066 }
1067 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1068 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1069 nodeContextPsoMgr, contextPoolMgr, stateCache);
1070 break;
1071 }
1072 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1073 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1074 contextPoolMgr, stateCache);
1075 break;
1076 }
1077 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1078 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1079 contextPoolMgr, stateCache);
1080 break;
1081 }
1082 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1083 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1084 nodeContextPsoMgr, contextPoolMgr, stateCache);
1085 break;
1086 }
1087 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1088 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1089 nodeContextPsoMgr, contextPoolMgr, stateCache);
1090 break;
1091 }
1092 //
1093 case RenderCommandType::WRITE_TIMESTAMP: {
1094 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1095 contextPoolMgr, stateCache);
1096 break;
1097 }
1098 case RenderCommandType::UNDEFINED:
1099 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1100 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1101 case RenderCommandType::BEGIN_DEBUG_MARKER:
1102 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1103 RenderCommand(*static_cast<RenderCommandBeginDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1104 contextPoolMgr, stateCache);
1105 #endif
1106 break;
1107 case RenderCommandType::END_DEBUG_MARKER:
1108 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1109 RenderCommand(*static_cast<RenderCommandEndDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1110 contextPoolMgr, stateCache);
1111 #endif
1112 break;
1113 default: {
1114 PLUGIN_ASSERT(false && "non-valid render command");
1115 break;
1116 }
1117 }
1118 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1119 {
1120 EndDebugMarker(cmdBuffer);
1121 }
1122 #endif
1123 }
1124
1125 if ((!presentationData_.infos.empty())) {
1126 RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1127 }
1128
1129 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1130 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1131 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1132 }
1133 #endif
1134
1135 #if (RENDER_PERF_ENABLED == 1)
1136 // copy counters
1137 if (perfDataSet) {
1138 CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1139 }
1140 #endif
1141
1142 if (endCommandBuffer) {
1143 #if (RENDER_PERF_ENABLED == 1)
1144 if (perfDataSet) {
1145 perfDataSet->cpuTimer.End();
1146 }
1147 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1148 if (validGpuQueries) {
1149 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1150 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1151 }
1152 #endif
1153 CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1154 #endif
1155
1156 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1157
1158 if (mrclDesc.secondaryCommandBuffer) {
1159 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1160 } else {
1161 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1162 }
1163 }
1164 }
1165
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1166 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1167 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1168 {
1169 const RenderHandle psoHandle = renderCmd.psoHandle;
1170 const auto pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1171
1172 stateCache.psoHandle = psoHandle;
1173
1174 VkPipeline pipeline { VK_NULL_HANDLE };
1175 VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1176 if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1177 const auto* pso = static_cast<const ComputePipelineStateObjectVk*>(
1178 psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1179 if (pso) {
1180 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1181 pipeline = plat.pipeline;
1182 pipelineLayout = plat.pipelineLayout;
1183 }
1184 } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1185 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1186 if (stateCache.renderCommandBeginRenderPass) {
1187 uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1188 if (stateCache.pipelineDescSetHash != 0) {
1189 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1190 }
1191 const auto* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1192 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1193 stateCache.renderCommandBeginRenderPass->subpasses,
1194 stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1195 &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1196 if (pso) {
1197 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1198 pipeline = plat.pipeline;
1199 pipelineLayout = plat.pipelineLayout;
1200 }
1201 }
1202 }
1203
1204 // NOTE: render front-end expects pso binding after begin render pass
1205 // in some situations the render pass might change and therefore the pipeline changes
1206 // in some situations the render pass is the same and the rebinding is not needed
1207 const bool newPipeline = (pipeline != stateCache.pipeline);
1208 const bool valid = (pipeline != VK_NULL_HANDLE);
1209 if (valid && newPipeline) {
1210 stateCache.pipeline = pipeline;
1211 stateCache.pipelineLayout = pipelineLayout;
1212 stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1213 vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1214 pipelineBindPoint, // pipelineBindPoint
1215 pipeline); // pipeline
1216 #if (RENDER_PERF_ENABLED == 1)
1217 stateCache.perfCounters.bindPipelineCount++;
1218 #endif
1219 }
1220 }
1221
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1222 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1223 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1224 {
1225 if (stateCache.validBindings) {
1226 if (renderCmd.indexCount) {
1227 vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1228 renderCmd.indexCount, // indexCount
1229 renderCmd.instanceCount, // instanceCount
1230 renderCmd.firstIndex, // firstIndex
1231 renderCmd.vertexOffset, // vertexOffset
1232 renderCmd.firstInstance); // firstInstance
1233 #if (RENDER_PERF_ENABLED == 1)
1234 stateCache.perfCounters.drawCount++;
1235 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1236 stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1237 #endif
1238 } else {
1239 vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1240 renderCmd.vertexCount, // vertexCount
1241 renderCmd.instanceCount, // instanceCount
1242 renderCmd.firstVertex, // firstVertex
1243 renderCmd.firstInstance); // firstInstance
1244 #if (RENDER_PERF_ENABLED == 1)
1245 stateCache.perfCounters.drawCount++;
1246 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1247 stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1248 * renderCmd.instanceCount;
1249 #endif
1250 }
1251 }
1252 }
1253
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1254 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1255 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1256 {
1257 if (stateCache.validBindings) {
1258 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1259 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1260 const VkBuffer buffer = plat.buffer;
1261 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1262 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1263 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1264 buffer, // buffer
1265 offset, // offset
1266 renderCmd.drawCount, // drawCount
1267 renderCmd.stride); // stride
1268 } else {
1269 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1270 buffer, // buffer
1271 (VkDeviceSize)renderCmd.offset, // offset
1272 renderCmd.drawCount, // drawCount
1273 renderCmd.stride); // stride
1274 }
1275 #if (RENDER_PERF_ENABLED == 1)
1276 stateCache.perfCounters.drawIndirectCount++;
1277 #endif
1278 }
1279 }
1280 }
1281
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1282 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1283 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1284 {
1285 if (stateCache.validBindings) {
1286 vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1287 renderCmd.groupCountX, // groupCountX
1288 renderCmd.groupCountY, // groupCountY
1289 renderCmd.groupCountZ); // groupCountZ
1290 #if (RENDER_PERF_ENABLED == 1)
1291 stateCache.perfCounters.dispatchCount++;
1292 #endif
1293 }
1294 }
1295
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1296 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1297 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1298 const StateCache& stateCache)
1299 {
1300 if (stateCache.validBindings) {
1301 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1302 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1303 const VkBuffer buffer = plat.buffer;
1304 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1305 vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1306 buffer, // buffer
1307 offset); // offset
1308 #if (RENDER_PERF_ENABLED == 1)
1309 stateCache.perfCounters.dispatchIndirectCount++;
1310 #endif
1311 }
1312 }
1313 }
1314
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1315 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1316 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1317 StateCache& stateCache)
1318 {
1319 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1320 stateCache.renderCommandBeginRenderPass = &renderCmd;
1321
1322 auto& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1323 // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1324 stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1325
1326 // early out for multi render command list render pass
1327 if (stateCache.secondaryCommandBuffer) {
1328 return; // early out
1329 }
1330 const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1331 (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1332 // invalidate the whole command list
1333 if (!validRpFbo) {
1334 stateCache.validCommandList = false;
1335 return; // early out
1336 }
1337
1338 if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1339 if (renderCmd.subpassStartIndex < renderCmd.subpasses.size()) {
1340 if ((renderCmd.subpasses[renderCmd.subpassStartIndex].subpassFlags &
1341 SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
1342 const auto subpassContents = static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1343 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1344 subpassContents); // contents
1345 }
1346 }
1347 return; // early out
1348 }
1349
1350 const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1351
1352 VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1353 bool hasClearValues = false;
1354 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1355 const auto& ref = renderPassDesc.attachments[idx];
1356 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1357 ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1358 const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1359 VkClearValue cVal;
1360 if (RenderHandleUtil::IsDepthImage(handle)) {
1361 PLUGIN_STATIC_ASSERT(sizeof(cVal.depthStencil) == sizeof(ref.clearValue.depthStencil));
1362 cVal.depthStencil.depth = ref.clearValue.depthStencil.depth;
1363 cVal.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1364 } else {
1365 PLUGIN_STATIC_ASSERT(sizeof(cVal.color) == sizeof(ref.clearValue.color));
1366 CloneData(&cVal.color, sizeof(cVal.color), &ref.clearValue.color, sizeof(ref.clearValue.color));
1367 }
1368 clearValues[idx] = cVal;
1369 hasClearValues = true;
1370 }
1371 }
1372
1373 // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1374 // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1375 const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1376
1377 VkRect2D renderArea {
1378 { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1379 { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1380 };
1381 // render area needs to be inside frame buffer
1382 const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1383 renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1384 renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1385 renderArea.extent.width = Math::min(renderArea.extent.width,
1386 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1387 renderArea.extent.height = Math::min(renderArea.extent.height,
1388 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1389
1390 const VkRenderPassBeginInfo renderPassBeginInfo {
1391 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // sType
1392 nullptr, // pNext
1393 stateCache.lowLevelRenderPassData.renderPass, // renderPass
1394 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1395 renderArea, // renderArea
1396 clearValueCount, // clearValueCount
1397 clearValues, // pClearValues
1398 };
1399
1400 // NOTE: could be patched in render graph
1401 // const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1402 const VkSubpassContents subpassContents =
1403 stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1404 vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1405 &renderPassBeginInfo, // pRenderPassBegin
1406 subpassContents); // contents
1407 #if (RENDER_PERF_ENABLED == 1)
1408 stateCache.perfCounters.renderPassCount++;
1409 #endif
1410 }
1411
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1412 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1413 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1414 {
1415 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1416
1417 const auto subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1418 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1419 subpassContents); // contents
1420 }
1421
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1422 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1423 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1424 {
1425 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1426
1427 // early out for multi render command list render pass
1428 if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1429 return; // NOTE
1430 }
1431
1432 stateCache.renderCommandBeginRenderPass = nullptr;
1433 stateCache.lowLevelRenderPassData = {};
1434
1435 if (!stateCache.secondaryCommandBuffer) {
1436 vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1437 }
1438 }
1439
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1440 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1441 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1442 const StateCache& stateCache)
1443 {
1444 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1445 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1446
1447 const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1448
1449 VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1450 VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1451 const GpuBufferVk* gpuBuffer = nullptr;
1452 RenderHandle currBufferHandle;
1453 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1454 const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1455 // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1456 // do not re-fetch the buffer if not needed
1457 if (currBufferHandle.id != currVb.bufferHandle.id) {
1458 currBufferHandle = currVb.bufferHandle;
1459 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1460 }
1461 if (gpuBuffer) {
1462 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1463 const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1464 vertexBuffers[idx] = plat.buffer;
1465 offsets[idx] = offset;
1466 }
1467 }
1468
1469 vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1470 0, // firstBinding
1471 vertexBufferCount, // bindingCount
1472 vertexBuffers, // pBuffers
1473 offsets); // pOffsets
1474 }
1475
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1476 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1477 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1478 const StateCache& stateCache)
1479 {
1480 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1481
1482 PLUGIN_ASSERT(gpuBuffer);
1483 if (gpuBuffer) {
1484 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1485 const VkBuffer buffer = plat.buffer;
1486 const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1487 const auto indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1488
1489 vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1490 buffer, // buffer
1491 offset, // offset
1492 indexType); // indexType
1493 }
1494 }
1495
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1496 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1497 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1498 {
1499 const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1500 const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1501 if (srcImagePtr && dstImagePtr) {
1502 const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1503 const auto& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1504
1505 const ImageBlit& ib = renderCmd.imageBlit;
1506 const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1507 ? srcPlatImage.arrayLayers
1508 : ib.srcSubresource.layerCount;
1509 const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1510 ? dstPlatImage.arrayLayers
1511 : ib.dstSubresource.layerCount;
1512
1513 const VkImageSubresourceLayers srcSubresourceLayers {
1514 (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1515 ib.srcSubresource.mipLevel, // mipLevel
1516 ib.srcSubresource.baseArrayLayer, // baseArrayLayer
1517 srcLayerCount, // layerCount
1518 };
1519 const VkImageSubresourceLayers dstSubresourceLayers {
1520 (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1521 ib.dstSubresource.mipLevel, // mipLevel
1522 ib.dstSubresource.baseArrayLayer, // baseArrayLayer
1523 dstLayerCount, // layerCount
1524 };
1525
1526 const VkImageBlit imageBlit {
1527 srcSubresourceLayers, // srcSubresource
1528 { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1529 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1530 (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1531 dstSubresourceLayers, // dstSubresource
1532 { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1533 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1534 (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1535 };
1536
1537 vkCmdBlitImage(cmdBuf.commandBuffer, // commandBuffer
1538 srcPlatImage.image, // srcImage
1539 (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1540 dstPlatImage.image, // dstImage
1541 (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1542 1, // regionCount
1543 &imageBlit, // pRegions
1544 (VkFilter)renderCmd.filter); // filter
1545 }
1546 }
1547
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1548 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1549 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1550 {
1551 const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1552 const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1553
1554 PLUGIN_ASSERT(srcGpuBuffer);
1555 PLUGIN_ASSERT(dstGpuBuffer);
1556
1557 if (srcGpuBuffer && dstGpuBuffer) {
1558 const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1559 const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1560 const VkBufferCopy bufferCopy {
1561 renderCmd.bufferCopy.srcOffset,
1562 renderCmd.bufferCopy.dstOffset,
1563 renderCmd.bufferCopy.size,
1564 };
1565
1566 if (bufferCopy.size > 0) {
1567 vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1568 srcBuffer, // srcBuffer
1569 dstBuffer, // dstBuffer
1570 1, // regionCount
1571 &bufferCopy); // pRegions
1572 }
1573 }
1574 }
1575
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1576 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1577 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1578 const StateCache& stateCache)
1579 {
1580 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1581 PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1582 return;
1583 }
1584
1585 const GpuBufferVk* gpuBuffer = nullptr;
1586 const GpuImageVk* gpuImage = nullptr;
1587 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1588 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1589 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1590 } else {
1591 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1592 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1593 }
1594
1595 if (gpuBuffer && gpuImage) {
1596 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1597 const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1598 const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1599 const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1600 ? platImage.arrayLayers
1601 : subresourceLayer.layerCount;
1602 const VkImageSubresourceLayers imageSubresourceLayer {
1603 (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1604 subresourceLayer.mipLevel,
1605 subresourceLayer.baseArrayLayer,
1606 layerCount,
1607 };
1608 const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1609 // Math::min to force staying inside image
1610 const uint32_t mip = subresourceLayer.mipLevel;
1611 const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1612 const Size3D& imageOffset = bufferImageCopy.imageOffset;
1613 const VkExtent3D imageExtent = {
1614 Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1615 Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1616 Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1617 };
1618 const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1619 (imageOffset.depth < imageSize.depth);
1620 const VkBufferImageCopy bufferImageCopyVk {
1621 bufferImageCopy.bufferOffset,
1622 bufferImageCopy.bufferRowLength,
1623 bufferImageCopy.bufferImageHeight,
1624 imageSubresourceLayer,
1625 { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1626 static_cast<int32_t>(imageOffset.depth) },
1627 imageExtent,
1628 };
1629
1630 const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1631 const VkImage image = (gpuImage->GetPlatformData()).image;
1632
1633 if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1634 vkCmdCopyBufferToImage(cmdBuf.commandBuffer, // commandBuffer
1635 buffer, // srcBuffer
1636 image, // dstImage
1637 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1638 1, // regionCount
1639 &bufferImageCopyVk); // pRegions
1640 } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1641 vkCmdCopyImageToBuffer(cmdBuf.commandBuffer, // commandBuffer
1642 image, // srcImage
1643 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1644 buffer, // dstBuffer
1645 1, // regionCount
1646 &bufferImageCopyVk); // pRegions
1647 }
1648 }
1649 }
1650
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1651 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1652 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1653 {
1654 const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1655 const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1656 if (srcGpuImage && dstGpuImage) {
1657 const ImageCopy& copy = renderCmd.imageCopy;
1658 const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1659 const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1660
1661 const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1662 const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1663 const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1664 ? srcPlatImage.arrayLayers
1665 : srcSubresourceLayer.layerCount;
1666 const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1667 ? dstPlatImage.arrayLayers
1668 : dstSubresourceLayer.layerCount;
1669
1670 const VkImageSubresourceLayers srcImageSubresourceLayer {
1671 (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1672 srcSubresourceLayer.mipLevel,
1673 srcSubresourceLayer.baseArrayLayer,
1674 srcLayerCount,
1675 };
1676 const VkImageSubresourceLayers dstImageSubresourceLayer {
1677 (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1678 dstSubresourceLayer.mipLevel,
1679 dstSubresourceLayer.baseArrayLayer,
1680 dstLayerCount,
1681 };
1682
1683 const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1684 const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1685
1686 VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1687 ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1688 ext.height =
1689 Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1690 ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1691
1692 const VkImageCopy imageCopyVk {
1693 srcImageSubresourceLayer, // srcSubresource
1694 { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1695 dstImageSubresourceLayer, // dstSubresource
1696 { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1697 ext, // extent
1698 };
1699 vkCmdCopyImage(cmdBuf.commandBuffer, // commandBuffer
1700 srcPlatImage.image, // srcImage
1701 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1702 dstPlatImage.image, // dstImage
1703 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1704 1, // regionCount
1705 &imageCopyVk); // pRegions
1706 }
1707 }
1708
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1709 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1710 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1711 const RenderBarrierList& rbl)
1712 {
1713 if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1714 return;
1715 }
1716
1717 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1718 rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1719 PLUGIN_ASSERT(barrierPointBarriers);
1720 if (!barrierPointBarriers) {
1721 return;
1722 }
1723 constexpr uint32_t maxBarrierCount { 8 };
1724 VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1725 VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1726 VkMemoryBarrier memoryBarriers[maxBarrierCount];
1727
1728 // generally there is only single barrierListCount per barrier point
1729 // in situations with batched render passes there can be many
1730 // NOTE: all barrier lists could be patched to single vk command if needed
1731 // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1732 const auto barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1733 const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1734 #if (RENDER_VALIDATION_ENABLED == 1)
1735 uint32_t fullBarrierCount = 0u;
1736 #endif
1737 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1738 if (nextBarrierList == nullptr) { // cannot be null, just a safety
1739 PLUGIN_ASSERT(false);
1740 return;
1741 }
1742 const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1743 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1744 const auto barrierCount = (uint32_t)barrierListRef.count;
1745
1746 uint32_t bufferBarrierIdx = 0;
1747 uint32_t imageBarrierIdx = 0;
1748 uint32_t memoryBarrierIdx = 0;
1749
1750 VkPipelineStageFlags srcPipelineStageMask { 0 };
1751 VkPipelineStageFlags dstPipelineStageMask { 0 };
1752 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1753
1754 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1755 const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1756
1757 uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1758 uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1759 if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1760 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1761 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1762 }
1763
1764 const RenderHandle resourceHandle = ref.resourceHandle;
1765 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1766
1767 PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1768 (handleType == RenderHandleType::GPU_IMAGE));
1769
1770 const auto srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1771 const auto dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1772
1773 srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1774 dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1775
1776 // NOTE: zero size buffer barriers allowed ATM
1777 if (handleType == RenderHandleType::GPU_BUFFER) {
1778 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1779 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1780 // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1781 const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1782 const VkDeviceSize size =
1783 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1784 (VkDeviceSize)ref.dst.optionalByteSize);
1785 if (platBuffer.buffer) {
1786 bufferMemoryBarriers[bufferBarrierIdx++] = {
1787 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1788 nullptr, // pNext
1789 srcAccessMask, // srcAccessMask
1790 dstAccessMask, // dstAccessMask
1791 srcQueueFamilyIndex, // srcQueueFamilyIndex
1792 dstQueueFamilyIndex, // dstQueueFamilyIndex
1793 platBuffer.buffer, // buffer
1794 offset, // offset
1795 size, // size
1796 };
1797 }
1798 }
1799 } else if (handleType == RenderHandleType::GPU_IMAGE) {
1800 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1801 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1802
1803 const auto srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1804 const auto dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1805
1806 const VkImageAspectFlags imageAspectFlags =
1807 (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1808 ? platImage.aspectFlags
1809 : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1810
1811 const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1812 PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1813 ? VK_REMAINING_MIP_LEVELS
1814 : ref.src.optionalImageSubresourceRange.levelCount;
1815
1816 const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1817 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1818 ? VK_REMAINING_ARRAY_LAYERS
1819 : ref.src.optionalImageSubresourceRange.layerCount;
1820
1821 const VkImageSubresourceRange imageSubresourceRange {
1822 imageAspectFlags, // aspectMask
1823 ref.src.optionalImageSubresourceRange.baseMipLevel, // baseMipLevel
1824 levelCount, // levelCount
1825 ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1826 layerCount, // layerCount
1827 };
1828
1829 if (platImage.image) {
1830 imageMemoryBarriers[imageBarrierIdx++] = {
1831 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1832 nullptr, // pNext
1833 srcAccessMask, // srcAccessMask
1834 dstAccessMask, // dstAccessMask
1835 srcImageLayout, // oldLayout
1836 dstImageLayout, // newLayout
1837 srcQueueFamilyIndex, // srcQueueFamilyIndex
1838 dstQueueFamilyIndex, // dstQueueFamilyIndex
1839 platImage.image, // image
1840 imageSubresourceRange, // subresourceRange
1841 };
1842 }
1843 }
1844 } else {
1845 memoryBarriers[memoryBarrierIdx++] = {
1846 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1847 nullptr, // pNext
1848 srcAccessMask, // srcAccessMask
1849 dstAccessMask, // dstAccessMask
1850 };
1851 }
1852
1853 const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1854 const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1855 (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)));
1856
1857 if (hasBarriers && resetBarriers) {
1858 #if (RENDER_VALIDATION_ENABLED == 1)
1859 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1860 #endif
1861 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1862 srcPipelineStageMask, // srcStageMask
1863 dstPipelineStageMask, // dstStageMask
1864 dependencyFlags, // dependencyFlags
1865 memoryBarrierIdx, // memoryBarrierCount
1866 memoryBarriers, // pMemoryBarriers
1867 bufferBarrierIdx, // bufferMemoryBarrierCount
1868 bufferMemoryBarriers, // pBufferMemoryBarriers
1869 imageBarrierIdx, // imageMemoryBarrierCount
1870 imageMemoryBarriers); // pImageMemoryBarriers
1871
1872 bufferBarrierIdx = 0;
1873 imageBarrierIdx = 0;
1874 memoryBarrierIdx = 0;
1875 }
1876 }
1877 }
1878 #if (RENDER_VALIDATION_ENABLED == 1)
1879 if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1880 PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1881 "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1882 barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1883 }
1884 #endif
1885 }
1886
1887 namespace {
1888 struct DescriptorSetUpdateDataStruct {
1889 uint32_t accelIndex { 0U };
1890 uint32_t bufferIndex { 0U };
1891 uint32_t imageIndex { 0U };
1892 uint32_t samplerIndex { 0U };
1893 uint32_t writeBindIdx { 0U };
1894 };
1895
UpdateSingleDescriptorSet(const GpuResourceManager & gpuResourceMgr,RenderBackendVk::StateCache * stateCache,const LowLevelDescriptorSetVk * descriptorSet,const DescriptorSetLayoutBindingResourcesHandler & bindingResources,LowLevelContextDescriptorWriteDataVk & wd,DescriptorSetUpdateDataStruct & dsud)1896 void UpdateSingleDescriptorSet(const GpuResourceManager& gpuResourceMgr, RenderBackendVk::StateCache* stateCache,
1897 const LowLevelDescriptorSetVk* descriptorSet, const DescriptorSetLayoutBindingResourcesHandler& bindingResources,
1898 LowLevelContextDescriptorWriteDataVk& wd, DescriptorSetUpdateDataStruct& dsud)
1899 {
1900 // actual vulkan descriptor set update
1901 if (descriptorSet && descriptorSet->descriptorSet) {
1902 if ((uint32_t)bindingResources.bindings.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1903 PLUGIN_ASSERT(false);
1904 return;
1905 }
1906 const auto& buffers = bindingResources.buffers;
1907 const auto& images = bindingResources.images;
1908 const auto& samplers = bindingResources.samplers;
1909 for (const auto& refBuf : buffers) {
1910 const auto& ref = refBuf.desc;
1911 const uint32_t descriptorCount = ref.binding.descriptorCount;
1912 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1913 if (descriptorCount == 0) {
1914 continue;
1915 }
1916 const uint32_t arrayOffset = ref.arrayOffset;
1917 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1918 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1919 #if (RENDER_VULKAN_RT_ENABLED == 1)
1920 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1921 // first is the ref, starting from 1 we use array offsets
1922 const BindableBuffer& bRes =
1923 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1924 if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1925 const GpuAccelerationStructurePlatformDataVk& platAccel =
1926 resPtr->GetPlatformDataAccelerationStructure();
1927 wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1928 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1929 nullptr, // pNext
1930 descriptorCount, // accelerationStructureCount
1931 &platAccel.accelerationStructure, // pAccelerationStructures
1932 };
1933 }
1934 }
1935 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1936 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1937 &wd.descriptorAccelInfos[dsud.accelIndex], // pNext
1938 descriptorSet->descriptorSet, // dstSet
1939 ref.binding.binding, // dstBinding
1940 0, // dstArrayElement
1941 descriptorCount, // descriptorCount
1942 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1943 nullptr, // pImageInfo
1944 nullptr, // pBufferInfo
1945 nullptr, // pTexelBufferView
1946 };
1947 dsud.accelIndex += descriptorCount;
1948 #endif
1949 } else {
1950 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1951 // first is the ref, starting from 1 we use array offsets
1952 const BindableBuffer& bRes =
1953 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1954 const auto optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1955 if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1956 const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1957 // takes into account dynamic ring buffers with mapping
1958 const auto bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1959 const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1960 const VkDeviceSize bufferRange =
1961 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1962 (VkDeviceSize)bRes.byteSize);
1963 wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1964 platBuffer.buffer, // buffer
1965 byteOffset, // offset
1966 bufferRange, // range
1967 };
1968 }
1969 }
1970 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1971 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1972 nullptr, // pNext
1973 descriptorSet->descriptorSet, // dstSet
1974 ref.binding.binding, // dstBinding
1975 0, // dstArrayElement
1976 descriptorCount, // descriptorCount
1977 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1978 nullptr, // pImageInfo
1979 &wd.descriptorBufferInfos[dsud.bufferIndex], // pBufferInfo
1980 nullptr, // pTexelBufferView
1981 };
1982 dsud.bufferIndex += descriptorCount;
1983 }
1984 }
1985 for (const auto& refImg : images) {
1986 const auto& ref = refImg.desc;
1987 const uint32_t descriptorCount = ref.binding.descriptorCount;
1988 // skip, array bindings which are bound from first index have also descriptorCount 0
1989 if (descriptorCount == 0) {
1990 continue;
1991 }
1992 const auto descriptorType = (VkDescriptorType)ref.binding.descriptorType;
1993 const uint32_t arrayOffset = ref.arrayOffset;
1994 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1995 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1996 // first is the ref, starting from 1 we use array offsets
1997 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].desc.resource;
1998 if (const GpuImageVk* resPtr = gpuResourceMgr.GetImage<GpuImageVk>(bRes.handle); resPtr) {
1999 VkSampler sampler = VK_NULL_HANDLE;
2000 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2001 const GpuSamplerVk* samplerPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2002 if (samplerPtr) {
2003 sampler = samplerPtr->GetPlatformData().sampler;
2004 }
2005 }
2006 const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2007 const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2008 VkImageView imageView = platImage.imageView;
2009 if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2010 (bRes.layer < platImageViews.layerImageViews.size())) {
2011 imageView = platImageViews.layerImageViews[bRes.layer];
2012 } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2013 if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2014 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2015 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2016 } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2017 imageView = platImageViews.mipImageViews[bRes.mip];
2018 }
2019 }
2020 wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2021 sampler, // sampler
2022 imageView, // imageView
2023 (VkImageLayout)bRes.imageLayout, // imageLayout
2024 };
2025 }
2026 }
2027 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2028 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2029 nullptr, // pNext
2030 descriptorSet->descriptorSet, // dstSet
2031 ref.binding.binding, // dstBinding
2032 0, // dstArrayElement
2033 descriptorCount, // descriptorCount
2034 descriptorType, // descriptorType
2035 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2036 nullptr, // pBufferInfo
2037 nullptr, // pTexelBufferView
2038 };
2039 dsud.imageIndex += descriptorCount;
2040 }
2041 for (const auto& refSam : samplers) {
2042 const auto& ref = refSam.desc;
2043 const uint32_t descriptorCount = ref.binding.descriptorCount;
2044 // skip, array bindings which are bound from first index have also descriptorCount 0
2045 if (descriptorCount == 0) {
2046 continue;
2047 }
2048 const uint32_t arrayOffset = ref.arrayOffset;
2049 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2050 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2051 // first is the ref, starting from 1 we use array offsets
2052 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].desc.resource;
2053 if (const GpuSamplerVk* resPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2054 const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2055 wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2056 platSampler.sampler, // sampler
2057 VK_NULL_HANDLE, // imageView
2058 VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2059 };
2060 }
2061 }
2062 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2063 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2064 nullptr, // pNext
2065 descriptorSet->descriptorSet, // dstSet
2066 ref.binding.binding, // dstBinding
2067 0, // dstArrayElement
2068 descriptorCount, // descriptorCount
2069 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2070 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2071 nullptr, // pBufferInfo
2072 nullptr, // pTexelBufferView
2073 };
2074 dsud.samplerIndex += descriptorCount;
2075 }
2076 #if (RENDER_PERF_ENABLED == 1)
2077 // count the actual updated descriptors sets, not the api calls
2078 if (stateCache) {
2079 stateCache->perfCounters.updateDescriptorSetCount++;
2080 }
2081 #endif
2082 }
2083 }
2084 } // namespace
2085
UpdateGlobalDescriptorSets()2086 void RenderBackendVk::UpdateGlobalDescriptorSets()
2087 {
2088 RENDER_CPU_PERF_SCOPE("UpdateGlobalDescriptorSets", "");
2089
2090 auto& dsMgr = (DescriptorSetManagerVk&)device_.GetDescriptorSetManager();
2091 LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2092 const auto& allDescSets = dsMgr.GetUpdateDescriptorSetHandles();
2093 const uint32_t upDescriptorSetCount =
2094 static_cast<uint32_t>(Math::min(allDescSets.size(), wd.writeDescriptorSets.size()));
2095 DescriptorSetUpdateDataStruct dsud;
2096
2097 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2098 if (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET) {
2099 continue;
2100 }
2101 const RenderHandle descHandle = allDescSets[descIdx];
2102 // first update gpu descriptor indices
2103 dsMgr.UpdateDescriptorSetGpuHandle(descHandle);
2104
2105 const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2106 const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2107
2108 UpdateSingleDescriptorSet(gpuResourceMgr_, nullptr, descriptorSet, bindingResources, wd, dsud);
2109
2110 // NOTE: should update perf counters
2111 }
2112
2113 // update if the batch ended or we are the last descriptor set
2114 if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2115 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2116 vkUpdateDescriptorSets(device, // device
2117 dsud.writeBindIdx, // descriptorWriteCount
2118 wd.writeDescriptorSets.data(), // pDescriptorWrites
2119 0, // descriptorCopyCount
2120 nullptr); // pDescriptorCopies
2121 }
2122 }
2123
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)2124 void RenderBackendVk::UpdateCommandListDescriptorSets(
2125 const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
2126 {
2127 auto& dsMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
2128
2129 const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2130 const auto upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
2131 LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2132 DescriptorSetUpdateDataStruct dsud;
2133 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2134 if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
2135 (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
2136 continue;
2137 }
2138
2139 const RenderHandle descHandle = allDescSets[descIdx];
2140 // first update gpu descriptor indices
2141 dsMgr.UpdateDescriptorSetGpuHandle(descHandle);
2142
2143 const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2144 const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2145
2146 UpdateSingleDescriptorSet(gpuResourceMgr_, &stateCache, descriptorSet, bindingResources, wd, dsud);
2147 }
2148 // update if the batch ended or we are the last descriptor set
2149 if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2150 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2151 vkUpdateDescriptorSets(device, // device
2152 dsud.writeBindIdx, // descriptorWriteCount
2153 wd.writeDescriptorSets.data(), // pDescriptorWrites
2154 0, // descriptorCopyCount
2155 nullptr); // pDescriptorCopies
2156 }
2157 }
2158
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2159 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2160 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2161 StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2162 {
2163 const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2164
2165 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2166 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2167 const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2168 ? VK_PIPELINE_BIND_POINT_COMPUTE
2169 : VK_PIPELINE_BIND_POINT_GRAPHICS;
2170 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2171
2172 bool valid = (pipelineLayout != VK_NULL_HANDLE);
2173 const uint32_t firstSet = renderCmd.firstSet;
2174 const uint32_t setCount = renderCmd.setCount;
2175 if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2176 uint32_t dynamicOffsetDescriptorSetIndices = 0;
2177 uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2178
2179 VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2180 const uint32_t firstPlusCount = firstSet + setCount;
2181 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2182 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2183 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2184 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2185 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2186
2187 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2188 if (descriptorSet && descriptorSet->descriptorSet) {
2189 descriptorSets[idx] = descriptorSet->descriptorSet;
2190 // update, copy to state cache
2191 PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2192 stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2193 const uint32_t currShift = (idx * 16u);
2194 const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2195 uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2196 stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2197 } else {
2198 valid = false;
2199 }
2200 }
2201 }
2202
2203 uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2204 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2205 uint32_t dynamicOffsetIdx = 0;
2206 // NOTE: optimize
2207 // this code has some safety checks that the offset is not updated for non-dynamic sets
2208 // it could be left on only for validation
2209 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2210 if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2211 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2212 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2213 const auto dodResCount = static_cast<uint32_t>(dod.resources.size());
2214 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2215 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2216 uint32_t byteOffset = 0U;
2217 if (descriptorSetDynamicOffsets.dynamicOffsets &&
2218 (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount)) {
2219 byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dodIdx];
2220 }
2221 dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2222 }
2223 }
2224 }
2225
2226 stateCache.validBindings = valid;
2227 if (stateCache.validBindings) {
2228 if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2229 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2230 pipelineBindPoint, // pipelineBindPoint
2231 pipelineLayout, // layout
2232 firstSet, // firstSet
2233 setCount, // descriptorSetCount
2234 &descriptorSets[firstSet], // pDescriptorSets
2235 dynamicOffsetIdx, // dynamicOffsetCount
2236 dynamicOffsets); // pDynamicOffsets
2237 #if (RENDER_PERF_ENABLED == 1)
2238 stateCache.perfCounters.bindDescriptorSetCount++;
2239 #endif
2240 } else {
2241 // possible pso re-creation and bind of these sets to the new pso
2242 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2243 (PipelineBindPoint)pipelineBindPoint };
2244 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2245 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2246 }
2247 }
2248 }
2249 }
2250
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2251 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2252 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2253 {
2254 PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2255 PLUGIN_ASSERT(renderCmd.data);
2256
2257 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2258 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2259
2260 const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0));
2261 PLUGIN_ASSERT(valid);
2262
2263 if (valid) {
2264 const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2265 vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2266 pipelineLayout, // layout
2267 shaderStageFlags, // stageFlags
2268 0, // offset
2269 renderCmd.pushConstant.byteSize, // size
2270 static_cast<void*>(renderCmd.data)); // pValues
2271 }
2272 }
2273
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2274 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2275 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2276 const StateCache& stateCache)
2277 {
2278 #if (RENDER_VULKAN_RT_ENABLED == 1)
2279 // NOTE: missing
2280 const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2281 const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2282 if ((!dst) && (!scratchBuffer)) {
2283 return; // early out
2284 }
2285 const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2286 const VkDevice device = devicePlat.device;
2287
2288 const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2289 const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2290
2291 // scratch data with user offset
2292 const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2293 VkDeviceSize(renderCmd.scratchOffset) };
2294
2295 const size_t arraySize =
2296 renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2297 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2298 vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2299
2300 size_t arrayIndex = 0;
2301 for (const auto& trianglesRef : renderCmd.trianglesView) {
2302 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2303 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2304 nullptr, // pNext
2305 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
2306 {}, // geometry;
2307 0, // flags
2308 };
2309 uint32_t primitiveCount = 0;
2310 const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2311 const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2312 if (vb && ib) {
2313 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2314 device, vb->GetPlatformData().buffer) };
2315 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2316 device, ib->GetPlatformData().buffer) };
2317 VkDeviceOrHostAddressConstKHR transformData {};
2318 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2319 if (const GpuBufferVk* tr =
2320 gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2321 tr) {
2322 transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2323 }
2324 }
2325 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2326
2327 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2328 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2329 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2330 nullptr, // pNext
2331 VkFormat(trianglesRef.info.vertexFormat), // vertexFormat
2332 vertexData, // vertexData
2333 VkDeviceSize(trianglesRef.info.vertexStride), // vertexStride
2334 trianglesRef.info.maxVertex, // maxVertex
2335 VkIndexType(trianglesRef.info.indexType), // indexType
2336 indexData, // indexData
2337 transformData, // transformData
2338 };
2339 }
2340 buildRangeInfos[arrayIndex] = {
2341 primitiveCount, // primitiveCount
2342 0u, // primitiveOffset
2343 0u, // firstVertex
2344 0u, // transformOffset
2345 };
2346 arrayIndex++;
2347 }
2348 for (const auto& aabbsRef : renderCmd.aabbsView) {
2349 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2350 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2351 nullptr, // pNext
2352 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
2353 {}, // geometry;
2354 0, // flags
2355 };
2356 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2357 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2358 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2359 }
2360 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2361 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2362 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2363 nullptr, // pNext
2364 deviceAddress, // data
2365 aabbsRef.info.stride, // stride
2366 };
2367 buildRangeInfos[arrayIndex] = {
2368 1u, // primitiveCount
2369 0u, // primitiveOffset
2370 0u, // firstVertex
2371 0u, // transformOffset
2372 };
2373 arrayIndex++;
2374 }
2375 for (const auto& instancesRef : renderCmd.instancesView) {
2376 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2377 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2378 nullptr, // pNext
2379 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
2380 {}, // geometry;
2381 0, // flags
2382 };
2383 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2384 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle); iPtr) {
2385 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2386 }
2387 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2388 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2389 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2390 nullptr, // pNext
2391 instancesRef.info.arrayOfPointers, // arrayOfPointers
2392 deviceAddress, // data
2393 };
2394 buildRangeInfos[arrayIndex] = {
2395 1u, // primitiveCount
2396 0u, // primitiveOffset
2397 0u, // firstVertex
2398 0u, // transformOffset
2399 };
2400 arrayIndex++;
2401 }
2402
2403 const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2404 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2405 nullptr, // pNext
2406 VkAccelerationStructureTypeKHR(renderCmd.type), // type
2407 VkBuildAccelerationStructureFlagsKHR(renderCmd.flags), // flags
2408 VkBuildAccelerationStructureModeKHR(renderCmd.mode), // mode
2409 VK_NULL_HANDLE, // srcAccelerationStructure
2410 dstAs, // dstAccelerationStructure
2411 uint32_t(arrayIndex), // geometryCount
2412 geometryData.data(), // pGeometries
2413 nullptr, // ppGeometries
2414 scratchData, // scratchData
2415 };
2416
2417 vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2418 for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2419 buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2420 }
2421 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2422 if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2423 extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2424 1u, // infoCount
2425 &buildGeometryInfo, // pInfos
2426 buildRangeInfosPtr.data()); // ppBuildRangeInfos
2427 }
2428 #endif
2429 }
2430
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2431 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2432 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2433 const StateCache& stateCache)
2434 {
2435 const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2436 // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2437 const auto imageLayout = (VkImageLayout)renderCmd.imageLayout;
2438 PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2439 if (imagePtr) {
2440 const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2441 if (platImage.image) {
2442 VkClearColorValue clearColor;
2443 PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2444 CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2445
2446 // NOTE: temporary vector allocated due to not having max limit
2447 vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2448 for (size_t idx = 0; idx < ranges.size(); ++idx) {
2449 const auto& inputRef = renderCmd.ranges[idx];
2450 ranges[idx] = {
2451 (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2452 inputRef.baseMipLevel, // baseMipLevel
2453 inputRef.levelCount, // levelCount
2454 inputRef.baseArrayLayer, // baseArrayLayer
2455 inputRef.layerCount, // layerCount
2456 };
2457 }
2458
2459 vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2460 platImage.image, // image
2461 imageLayout, // imageLayout
2462 &clearColor, // pColor
2463 static_cast<uint32_t>(ranges.size()), // rangeCount
2464 ranges.data()); // pRanges
2465 }
2466 }
2467 }
2468
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2469 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2470 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2471 const StateCache& stateCache)
2472 {
2473 const ViewportDesc& vd = renderCmd.viewportDesc;
2474
2475 VkViewport vp {
2476 vd.x, // x
2477 vd.y, // y
2478 vd.width, // width
2479 vd.height, // height
2480 vd.minDepth, // minDepth
2481 vd.maxDepth, // maxDepth
2482 };
2483 // handle viewport for surface transform
2484 const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2485 if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2486 if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2487 vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.height - vd.y;
2488 vp.y = vd.x;
2489 vp.width = vd.height;
2490 vp.height = vd.width;
2491 } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2492 vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.width - vd.x;
2493 vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.height - vd.y;
2494 } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2495 vp.x = vd.y;
2496 vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.width - vd.x;
2497 vp.width = vd.height;
2498 vp.height = vd.width;
2499 }
2500 }
2501
2502 vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2503 0, // firstViewport
2504 1, // viewportCount
2505 &vp); // pViewports
2506 }
2507
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2508 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2509 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2510 const StateCache& stateCache)
2511 {
2512 const ScissorDesc& sd = renderCmd.scissorDesc;
2513
2514 VkRect2D sc {
2515 { sd.offsetX, sd.offsetY }, // offset
2516 { sd.extentWidth, sd.extentHeight }, // extent
2517 };
2518 // handle scissor for surface transform
2519 const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2520 if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2521 if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2522 sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.height - sc.offset.y, sc.offset.x },
2523 { sc.extent.height, sc.extent.width } };
2524 } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2525 sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.width - sc.offset.x,
2526 (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.height - sc.offset.y },
2527 { sc.extent.width, sc.extent.height } };
2528 } else if (rpd.surfaceTransformFlags == CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2529 sc = { { sc.offset.y, (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.width - sc.offset.x },
2530 { sc.extent.height, sc.extent.width } };
2531 }
2532 }
2533
2534 vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2535 0, // firstScissor
2536 1, // scissorCount
2537 &sc); // pScissors
2538 }
2539
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2540 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2541 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2542 const StateCache& stateCache)
2543 {
2544 vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2545 renderCmd.lineWidth); // lineWidth
2546 }
2547
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2548 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2549 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2550 const StateCache& stateCache)
2551 {
2552 vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2553 renderCmd.depthBiasConstantFactor, // depthBiasConstantFactor
2554 renderCmd.depthBiasClamp, // depthBiasClamp
2555 renderCmd.depthBiasSlopeFactor); // depthBiasSlopeFactor
2556 }
2557
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2558 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2559 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2560 const StateCache& stateCache)
2561 {
2562 vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2563 renderCmd.blendConstants); // blendConstants[4]
2564 }
2565
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2566 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2567 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2568 const StateCache& stateCache)
2569 {
2570 vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2571 renderCmd.minDepthBounds, // minDepthBounds
2572 renderCmd.maxDepthBounds); // maxDepthBounds
2573 }
2574
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2575 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2576 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2577 const StateCache& stateCache)
2578 {
2579 const auto stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2580
2581 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2582 vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2583 stencilFaceMask, // faceMask
2584 renderCmd.mask); // compareMask
2585 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2586 vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2587 stencilFaceMask, // faceMask
2588 renderCmd.mask); // writeMask
2589 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2590 vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2591 stencilFaceMask, // faceMask
2592 renderCmd.mask); // reference
2593 }
2594 }
2595
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2596 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2597 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2598 const StateCache& stateCache)
2599 {
2600 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2601 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2602 if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2603 const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2604 const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2605 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2606 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2607 };
2608
2609 extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2610 &fragmentSize, // pFragmentSize
2611 combinerOps); // combinerOps
2612 }
2613 #endif
2614 }
2615
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2616 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2617 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2618 const StateCache& stateCache)
2619 {
2620 if (stateCache.backendNode) {
2621 const RenderBackendRecordingStateVk recordingState = {
2622 {},
2623 cmdBuf.commandBuffer, // commandBuffer
2624 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2625 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2626 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2627 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2628 stateCache.pipelineLayout, // pipelineLayout
2629 };
2630 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2631 stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2632 }
2633 }
2634
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2635 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2636 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2637 {
2638 PLUGIN_ASSERT_MSG(false, "not implemented");
2639
2640 const auto pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2641 const uint32_t queryIndex = renderCmd.queryIndex;
2642 VkQueryPool queryPool = VK_NULL_HANDLE;
2643
2644 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2645 queryPool, // queryPool
2646 queryIndex, // firstQuery
2647 1); // queryCount
2648
2649 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2650 pipelineStageFlagBits, // pipelineStage
2651 queryPool, // queryPool
2652 queryIndex); // query
2653 }
2654
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2655 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2656 {
2657 for (auto& presRef : presentationData_.infos) {
2658 if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2659 continue;
2660 }
2661
2662 PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2663 PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2664
2665 const GpuResourceState& state = presRef.renderGraphProcessedState;
2666 const auto srcAccessMask = (VkAccessFlags)state.accessFlags;
2667 const auto dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2668 const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2669 (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2670 const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2671 const auto oldLayout = (VkImageLayout)presRef.imageLayout;
2672 const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2673 // NOTE: queue is not currently checked (should be in the same queue as last time used)
2674 constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2675 constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2676 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2677 constexpr VkImageSubresourceRange imageSubresourceRange {
2678 VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2679 0, // baseMipLevel
2680 1, // levelCount
2681 0, // baseArrayLayer
2682 1, // layerCount
2683 };
2684
2685 const VkImageMemoryBarrier imageMemoryBarrier {
2686 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2687 nullptr, // pNext
2688 srcAccessMask, // srcAccessMask
2689 dstAccessMask, // dstAccessMask
2690 oldLayout, // oldLayout
2691 newLayout, // newLayout
2692 srcQueueFamilyIndex, // srcQueueFamilyIndex
2693 dstQueueFamilyIndex, // dstQueueFamilyIndex
2694 presRef.swapchainImage, // image
2695 imageSubresourceRange, // subresourceRange
2696 };
2697
2698 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2699 srcStageMask, // srcStageMask
2700 dstStageMask, // dstStageMask
2701 dependencyFlags, // dependencyFlags
2702 0, // memoryBarrierCount
2703 nullptr, // pMemoryBarriers
2704 0, // bufferMemoryBarrierCount
2705 nullptr, // pBufferMemoryBarriers
2706 1, // imageMemoryBarrierCount
2707 &imageMemoryBarrier); // pImageMemoryBarriers
2708
2709 presRef.presentationLayoutChangeNeeded = false;
2710 presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2711 }
2712 }
2713
2714 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
BeginDebugMarker(const LowLevelCommandBufferVk & cmdBuf,const BASE_NS::string_view name,const Math::Vec4 color)2715 void RenderBackendVk::BeginDebugMarker(
2716 const LowLevelCommandBufferVk& cmdBuf, const BASE_NS::string_view name, const Math::Vec4 color)
2717 {
2718 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
2719 const VkDebugUtilsLabelEXT label {
2720 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
2721 nullptr, // pNext
2722 name.data(), // pLabelName
2723 { color.x, color.y, color.z, color.w } // color[4]
2724 };
2725 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuf.commandBuffer, &label);
2726 }
2727 }
2728
EndDebugMarker(const LowLevelCommandBufferVk & cmdBuf)2729 void RenderBackendVk::EndDebugMarker(const LowLevelCommandBufferVk& cmdBuf)
2730 {
2731 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
2732 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuf.commandBuffer);
2733 }
2734 }
2735 #endif
2736
2737 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
RenderCommand(const RenderCommandBeginDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2738 void RenderBackendVk::RenderCommand(const RenderCommandBeginDebugMarker& renderCmd,
2739 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2740 const StateCache& stateCache)
2741 {
2742 BeginDebugMarker(cmdBuf, renderCmd.name, renderCmd.color);
2743 }
2744
RenderCommand(const RenderCommandEndDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2745 void RenderBackendVk::RenderCommand(const RenderCommandEndDebugMarker& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2746 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2747 {
2748 EndDebugMarker(cmdBuf);
2749 }
2750 #endif
2751
2752 #if (RENDER_PERF_ENABLED == 1)
2753
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2754 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2755 {
2756 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2757 const string_view& debugName = renderCommandContext.debugName;
2758 if (timers_.count(debugName) == 0) { // new timers
2759 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2760 PerfDataSet& perfDataSet = timers_[debugName];
2761 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2762 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2763 constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2764 perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2765 #else
2766 timers_.insert({ debugName, {} });
2767 #endif
2768 }
2769 }
2770
2771 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2772 perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2773 perfGpuTimerData_.currentOffset =
2774 (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2775 #endif
2776 }
2777
EndFrameTimers()2778 void RenderBackendVk::EndFrameTimers()
2779 {
2780 int64_t fullGpuTime = 0;
2781 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2782 // already in micros
2783 fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2784 perfGpuTimerData_.fullGpuCounter = 0;
2785
2786 perfGpuTimerData_.gpuBuffer->Unmap();
2787 #endif
2788 if (IPerformanceDataManagerFactory* globalPerfData =
2789 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2790 globalPerfData) {
2791 IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2792 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2793 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2794 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2795 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2796 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2797 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2798
2799 CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2800 CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2801 CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2802 CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2803 CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2804 CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2805 }
2806 // go through and count combined draw counts for tracing
2807 PerfCounters counters;
2808 for (auto& timer : timers_) {
2809 CopyPerfCounters(timer.second.perfCounters, counters);
2810 timer.second.perfCounters = {}; // reset perf counters
2811 }
2812
2813 CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(counters.drawCount));
2814 CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(counters.drawIndirectCount));
2815 CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(counters.dispatchCount));
2816 CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(counters.dispatchIndirectCount));
2817 CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(counters.renderPassCount));
2818 CORE_PROFILER_PLOT("Bind pipeline count", static_cast<int64_t>(counters.bindPipelineCount));
2819 CORE_PROFILER_PLOT("Bind descriptor set count", static_cast<int64_t>(counters.bindDescriptorSetCount));
2820 CORE_PROFILER_PLOT("Update descriptor set count", static_cast<int64_t>(counters.updateDescriptorSetCount));
2821 CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(counters.instanceCount));
2822 CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(counters.triangleCount));
2823 }
2824
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2825 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2826 const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2827 {
2828 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2829 if (stateCache.secondaryCommandBuffer) {
2830 return; // cannot be called inside render pass (e.g. with secondary command buffers)
2831 }
2832 PLUGIN_ASSERT(timers_.count(name) == 1);
2833 const PerfDataSet* perfDataSet = &timers_[name];
2834 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2835 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2836 if (platData.queryPool) {
2837 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2838 platData.queryPool, // queryPool
2839 queryIndex, // firstQuery
2840 1); // queryCount
2841
2842 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2843 stageFlagBits, // pipelineStage,
2844 platData.queryPool, // queryPool,
2845 queryIndex); // query
2846 }
2847 }
2848 #endif
2849 }
2850
2851 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2852 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2853 {
2854 perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2855 perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2856 perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2857 perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2858 perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2859 perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2860 perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2861 perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2862 perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2863 perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2864 }
2865 } // namespace
2866
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)2867 void RenderBackendVk::CopyPerfTimeStamp(
2868 const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2869 {
2870 PLUGIN_ASSERT(timers_.count(name) == 1);
2871 PerfDataSet* const perfDataSet = &timers_[name];
2872
2873 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2874 // take data from earlier queries to cpu
2875 // and copy in from query to gpu buffer
2876 const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2877 int64_t gpuMicroSeconds = 0;
2878 {
2879 auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2880 auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2881
2882 const uint64_t startStamp = *currentData;
2883 const uint64_t endStamp = *(currentData + 1);
2884
2885 const double timestampPeriod =
2886 static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2887 .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2888 constexpr int64_t nanosToMicrosDivisor { 1000 };
2889 gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2890 constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2891 if (gpuMicroSeconds > maxValidMicroSecondValue) {
2892 gpuMicroSeconds = 0;
2893 }
2894 perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2895 }
2896 #endif
2897 const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2898
2899 if (IPerformanceDataManagerFactory* globalPerfData =
2900 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2901 globalPerfData) {
2902 IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2903
2904 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2905 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2906 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2907
2908 // cannot be called inside render pass (e.g. with secondary command buffers)
2909 if (!stateCache.secondaryCommandBuffer) {
2910 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2911 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2912
2913 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2914 PLUGIN_ASSERT(gpuBuffer);
2915 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2916
2917 constexpr uint32_t queryCount = 2;
2918 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2919 constexpr VkQueryResultFlags queryResultFlags =
2920 VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2921
2922 if (platData.queryPool) {
2923 vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2924 platData.queryPool, // queryPool
2925 0, // firstQuery
2926 queryCount, // queryCount
2927 platBuffer.buffer, // dstBuffer
2928 currentFrameByteOffset, // dstOffset
2929 queryStride, // stride
2930 queryResultFlags); // flags
2931 }
2932 }
2933 }
2934 #endif
2935 UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2936 }
2937 }
2938
2939 #endif
2940 RENDER_END_NAMESPACE()
2941