1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_vk.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34
35 #include "perf/cpu_perf_scope.h"
36 #if (RENDER_PERF_ENABLED == 1)
37 #include "perf/gpu_query.h"
38 #include "perf/gpu_query_manager.h"
39 #include "vulkan/gpu_query_vk.h"
40 #endif
41
42 #include "device/gpu_resource_handle_util.h"
43 #include "device/gpu_resource_manager.h"
44 #include "nodecontext/node_context_descriptor_set_manager.h"
45 #include "nodecontext/node_context_pool_manager.h"
46 #include "nodecontext/node_context_pso_manager.h"
47 #include "nodecontext/render_barrier_list.h"
48 #include "nodecontext/render_command_list.h"
49 #include "nodecontext/render_node_graph_node_store.h"
50 #include "render_backend.h"
51 #include "util/log.h"
52 #include "util/render_frame_util.h"
53 #include "vulkan/gpu_buffer_vk.h"
54 #include "vulkan/gpu_image_vk.h"
55 #include "vulkan/gpu_sampler_vk.h"
56 #include "vulkan/gpu_semaphore_vk.h"
57 #include "vulkan/node_context_descriptor_set_manager_vk.h"
58 #include "vulkan/node_context_pool_manager_vk.h"
59 #include "vulkan/pipeline_state_object_vk.h"
60 #include "vulkan/render_frame_sync_vk.h"
61 #include "vulkan/swapchain_vk.h"
62 #include "vulkan/validate_vk.h"
63
64 using namespace BASE_NS;
65
66 using CORE_NS::GetInstance;
67 using CORE_NS::IParallelTaskQueue;
68 using CORE_NS::IPerformanceDataManager;
69 using CORE_NS::IPerformanceDataManagerFactory;
70 using CORE_NS::ITaskQueueFactory;
71 using CORE_NS::IThreadPool;
72
73 RENDER_BEGIN_NAMESPACE()
74 namespace {
75 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)76 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
77 {
78 dst.drawCount += src.drawCount;
79 dst.drawIndirectCount += src.drawIndirectCount;
80 dst.dispatchCount += src.dispatchCount;
81 dst.dispatchIndirectCount += src.dispatchIndirectCount;
82 dst.bindPipelineCount += src.bindPipelineCount;
83 dst.renderPassCount += src.renderPassCount;
84 dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
85 dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
86 dst.triangleCount += src.triangleCount;
87 dst.instanceCount += src.instanceCount;
88 }
89 #endif
90
ProcessBackendPositionCommands(IDevice & device,const RenderBackendCommandPosition position,const array_view<const ProcessBackendCommand> commands)91 inline void ProcessBackendPositionCommands(IDevice& device, const RenderBackendCommandPosition position,
92 const array_view<const ProcessBackendCommand> commands)
93 {
94 for (const auto& ref : commands) {
95 if ((position == ref.backendCommandPosition) && ref.command) {
96 ref.command->ExecuteBackendCommand(device);
97 }
98 }
99 }
100 } // namespace
101
102 // Helper class for running std::function as a ThreadPool task.
103 class FunctionTask final : public IThreadPool::ITask {
104 public:
Create(std::function<void ()> func)105 static Ptr Create(std::function<void()> func)
106 {
107 return Ptr { new FunctionTask(BASE_NS::move(func)) };
108 }
109
FunctionTask(std::function<void ()> func)110 explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
111
operator ()()112 void operator()() override
113 {
114 func_();
115 }
116
117 protected:
Destroy()118 void Destroy() override
119 {
120 delete this;
121 }
122
123 private:
124 std::function<void()> func_;
125 };
126
127 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
128 namespace {
129 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
130 }
131 #endif
132
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,CORE_NS::ITaskQueue * const queue)133 RenderBackendVk::RenderBackendVk(Device& dev, GpuResourceManager& gpuResourceManager, CORE_NS::ITaskQueue* const queue)
134 : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
135 queue_(queue)
136 {
137 #if (RENDER_PERF_ENABLED == 1)
138 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
139 gpuQueryMgr_ = make_unique<GpuQueryManager>();
140
141 constexpr uint32_t maxQueryObjectCount { 512u };
142 constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
143 const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
144 const GpuBufferDesc desc {
145 BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT, // usageFlags
146 CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
147 0, // engineCreationFlags
148 fullByteSize, // byteSize
149 };
150 perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
151 perfGpuTimerData_.currentOffset = 0;
152 perfGpuTimerData_.frameByteSize = byteSize;
153 perfGpuTimerData_.fullByteSize = fullByteSize;
154 { // zero initialize
155 uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
156 memset_s(bufferData, fullByteSize, 0, fullByteSize);
157 perfGpuTimerData_.gpuBuffer->Unmap();
158 }
159 #endif
160 #endif
161 }
162
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)163 void RenderBackendVk::AcquirePresentationInfo(
164 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
165 {
166 RENDER_CPU_PERF_SCOPE("AcquirePresentationInfo", "");
167 if (device_.HasSwapchain()) {
168 presentationData_.present = true;
169 // resized to same for convenience
170 presentationData_.infos.resize(backBufferConfig.swapchainData.size());
171 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
172 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
173 PresentationInfo pi;
174 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175
176 if (const auto* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
177 swapchain) {
178 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
181 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
182 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
183 pi.swapchain = platSwapchain.swapchain;
184 pi.useSwapchain = true;
185 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
186 for (const auto& piRef : presentationData_.infos) {
187 if (piRef.swapchain == pi.swapchain) {
188 pi.useSwapchain = false;
189 }
190 }
191 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
192 if (pi.useSwapchain) {
193 const VkResult result = vkAcquireNextImageKHR(device, // device
194 vkSwapchain, // swapchin
195 UINT64_MAX, // timeout
196 pi.swapchainSemaphore, // semaphore
197 (VkFence) nullptr, // fence
198 &pi.swapchainImageIndex); // pImageIndex
199
200 switch (result) {
201 // Success
202 case VK_SUCCESS:
203 case VK_TIMEOUT:
204 case VK_NOT_READY:
205 case VK_SUBOPTIMAL_KHR:
206 pi.validAcquire = true;
207 break;
208
209 // Failure
210 case VK_ERROR_OUT_OF_HOST_MEMORY:
211 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
212 PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
213 return;
214 case VK_ERROR_DEVICE_LOST:
215 PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
216 return;
217 case VK_ERROR_OUT_OF_DATE_KHR:
218 PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
219 return;
220 case VK_ERROR_SURFACE_LOST_KHR:
221 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
222 return;
223
224 case VK_EVENT_SET:
225 case VK_EVENT_RESET:
226 case VK_INCOMPLETE:
227 case VK_ERROR_INITIALIZATION_FAILED:
228 case VK_ERROR_MEMORY_MAP_FAILED:
229 case VK_ERROR_LAYER_NOT_PRESENT:
230 case VK_ERROR_EXTENSION_NOT_PRESENT:
231 case VK_ERROR_FEATURE_NOT_PRESENT:
232 case VK_ERROR_INCOMPATIBLE_DRIVER:
233 case VK_ERROR_TOO_MANY_OBJECTS:
234 case VK_ERROR_FORMAT_NOT_SUPPORTED:
235 case VK_ERROR_FRAGMENTED_POOL:
236 case VK_ERROR_OUT_OF_POOL_MEMORY:
237 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
238 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
239 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
240 case VK_ERROR_VALIDATION_FAILED_EXT:
241 case VK_ERROR_INVALID_SHADER_NV:
242 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
243 case VK_ERROR_FRAGMENTATION_EXT:
244 case VK_ERROR_NOT_PERMITTED_EXT:
245 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
246 case VK_RESULT_MAX_ENUM:
247 default:
248 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
249 PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
250 device_.SetDeviceStatus(false);
251 break;
252 }
253
254 if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
255 PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
256 static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
257 }
258
259 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
260 const RenderHandle handle = swapchainData.remappableSwapchainImage;
261 if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
262 // remap image to backbuffer
263 const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
264 // special swapchain remapping
265 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
266 }
267 pi.renderGraphProcessedState = swapData.backBufferState;
268 pi.imageLayout = swapData.layout;
269 if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
270 pi.presentationLayoutChangeNeeded = true;
271 pi.renderNodeCommandListIndex =
272 static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
273
274 if (const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle); swapImage) {
275 pi.swapchainImage = swapImage->GetPlatformData().image;
276 }
277 }
278 }
279 }
280 presentationData_.infos[swapIdx] = pi;
281 }
282 }
283 }
284
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)285 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
286 {
287 if (!queue_) {
288 return;
289 }
290 // before presentation commands
291 ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::BEFORE_PRESENTATION, processBackendCommands_);
292
293 if (!backBufferConfig.swapchainData.empty()) {
294 if (device_.HasSwapchain() && presentationData_.present) {
295 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
296 uint32_t swapchainCount = 0U;
297 VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
298 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
299 uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
300 for (const auto& presRef : presentationData_.infos) {
301 // NOTE: default backbuffer might be present multiple times
302 // the flag useSwapchain should be false in these cases
303 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
304 PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
305 vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
306 vkSwapchains[swapchainCount++] = presRef.swapchain;
307 }
308 }
309 #if (RENDER_PERF_ENABLED == 1)
310 commonCpuTimers_.present.Begin();
311 #endif
312
313 // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
314 // semaphore)
315 if (swapchainCount > 0U) {
316 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
317 uint32_t waitSemaphoreCount = 0;
318 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
319 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
320 waitSemaphoreCount = 1;
321 }
322
323 const VkPresentInfoKHR presentInfo {
324 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
325 nullptr, // pNext
326 waitSemaphoreCount, // waitSemaphoreCount
327 &waitSemaphore, // pWaitSemaphores
328 swapchainCount, // swapchainCount
329 vkSwapchains, // pSwapchains
330 vkSwapImageIndices, // pImageIndices
331 nullptr // pResults
332 };
333
334 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
335 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
336 &presentInfo); // pPresentInfo
337
338 switch (result) {
339 // Success
340 case VK_SUCCESS:
341 break;
342 case VK_SUBOPTIMAL_KHR:
343 #if (RENDER_VALIDATION_ENABLED == 1)
344 PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
345 #endif
346 break;
347
348 // Failure
349 case VK_ERROR_OUT_OF_HOST_MEMORY:
350 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
351 PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
352 return;
353 case VK_ERROR_DEVICE_LOST:
354 PLUGIN_LOG_E("vkQueuePresentKHR device lost");
355 return;
356 case VK_ERROR_OUT_OF_DATE_KHR:
357 PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
358 return;
359 case VK_ERROR_SURFACE_LOST_KHR:
360 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
361 return;
362
363 case VK_NOT_READY:
364 case VK_TIMEOUT:
365 case VK_EVENT_SET:
366 case VK_EVENT_RESET:
367 case VK_INCOMPLETE:
368 case VK_ERROR_INITIALIZATION_FAILED:
369 case VK_ERROR_MEMORY_MAP_FAILED:
370 case VK_ERROR_LAYER_NOT_PRESENT:
371 case VK_ERROR_EXTENSION_NOT_PRESENT:
372 case VK_ERROR_FEATURE_NOT_PRESENT:
373 case VK_ERROR_INCOMPATIBLE_DRIVER:
374 case VK_ERROR_TOO_MANY_OBJECTS:
375 case VK_ERROR_FORMAT_NOT_SUPPORTED:
376 case VK_ERROR_FRAGMENTED_POOL:
377 case VK_ERROR_OUT_OF_POOL_MEMORY:
378 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
379 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
380 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
381 case VK_ERROR_VALIDATION_FAILED_EXT:
382 case VK_ERROR_INVALID_SHADER_NV:
383 case VK_ERROR_FRAGMENTATION_EXT:
384 case VK_ERROR_NOT_PERMITTED_EXT:
385 case VK_RESULT_MAX_ENUM:
386 default:
387 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
388 PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
389 break;
390 }
391 }
392 #if (RENDER_PERF_ENABLED == 1)
393 commonCpuTimers_.present.End();
394 #endif
395 } else {
396 #if (RENDER_VALIDATION_ENABLED == 1)
397 PLUGIN_LOG_ONCE_E(
398 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
399 #endif
400 }
401 }
402
403 // after presentation backend commands
404 ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::AFTER_PRESENTATION, processBackendCommands_);
405
406 // clear after presentation (also cleared in start of render backend)
407 processBackendCommands_.clear();
408 }
409
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)410 void RenderBackendVk::Render(
411 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
412 {
413 if (!queue_) {
414 return;
415 }
416
417 // NOTE: all command lists are validated before entering here
418 #if (RENDER_PERF_ENABLED == 1)
419 commonCpuTimers_.full.Begin();
420 commonCpuTimers_.acquire.Begin();
421 #endif
422
423 // clear backend commands
424 processBackendCommands_.clear();
425
426 commandBufferSubmitter_ = {};
427 commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
428
429 presentationData_.present = false;
430 presentationData_.infos.clear();
431
432 #if (RENDER_PERF_ENABLED == 1)
433 commonCpuTimers_.acquire.End();
434
435 StartFrameTimers(renderCommandFrameData);
436 commonCpuTimers_.execute.Begin();
437 #endif
438
439 // global begin backend frame
440 auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
441 descriptorSetMgr.BeginBackendFrame();
442
443 // command list process loop/execute
444 // first tries to acquire swapchain if needed in a task
445 RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
446
447 #if (RENDER_PERF_ENABLED == 1)
448 commonCpuTimers_.execute.End();
449 commonCpuTimers_.submit.Begin();
450 #endif
451
452 PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
453 // submit vulkan command buffers
454 // checks that presentation info has valid acquire
455 RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
456
457 #if (RENDER_PERF_ENABLED == 1)
458 commonCpuTimers_.submit.End();
459 commonCpuTimers_.full.End();
460 EndFrameTimers();
461 #endif
462 }
463
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)464 void RenderBackendVk::RenderProcessSubmitCommandLists(
465 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
466 {
467 // NOTE: currently backtraces to final valid command buffer semaphore
468 uint32_t finalCommandBufferSubmissionIndex = ~0u;
469 commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
470 bool swapchainSemaphoreWaited = false;
471 for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
472 --cmdBufferIdx) {
473 if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
474 (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
475 VK_NULL_HANDLE)) {
476 finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
477 break;
478 }
479 }
480
481 for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
482 const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
483 if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
484 continue;
485 }
486
487 const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
488
489 uint32_t waitSemaphoreCount = 0u;
490 constexpr const uint32_t maxWaitSemaphoreCount =
491 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
492 VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
493 VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
494 for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
495 const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
496 PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
497
498 VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
499 if (waitSemaphore != VK_NULL_HANDLE) {
500 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
501 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
502 waitSemaphoreCount++;
503 }
504 }
505
506 if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
507 (!presentationData_.infos.empty())) {
508 swapchainSemaphoreWaited = true;
509 // go through all swapchain semaphores
510 for (const auto& presRef : presentationData_.infos) {
511 if (presRef.swapchainSemaphore) {
512 waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
513 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
514 waitSemaphoreCount++;
515 }
516 }
517 }
518
519 uint32_t signalSemaphoreCount = 0u;
520 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
521 constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
522 VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
523 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
524 VkFence fence = VK_NULL_HANDLE;
525 if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
526 // add fence signaling to last submission for frame sync
527 if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
528 fence = frameSync->GetFrameFence().fence;
529 frameSync->FrameFenceIsSignalled();
530 }
531 // signal external semaphores
532 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
533 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
534 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
535 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
536 if (externalSignals.size() == externalSemaphores.size()) {
537 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
538 // needs to be false
539 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
540 if (const auto* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get(); gs) {
541 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
542 externalSignals[sigIdx].signaled = true;
543 }
544 }
545 }
546 }
547 }
548
549 if (presentationData_.present) {
550 commandBufferSubmitter_.presentationWaitSemaphore =
551 commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
552 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
553 }
554 // add additional semaphores
555 for (const auto& swapRef : backBufferConfig.swapchainData) {
556 // should have been checked in render graph already
557 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
558 semaphores[signalSemaphoreCount++] =
559 VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
560 }
561 }
562 } else if (renderContextRef.submitDepencies.signalSemaphore) {
563 semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
564 }
565 PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
566
567 const VkSubmitInfo submitInfo {
568 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
569 nullptr, // pNext
570 waitSemaphoreCount, // waitSemaphoreCount
571 (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
572 waitSemaphorePipelineStageFlags, // pWaitDstStageMask
573 1, // commandBufferCount
574 &cmdSubmitterRef.commandBuffer, // pCommandBuffers
575 signalSemaphoreCount, // signalSemaphoreCount
576 (signalSemaphoreCount == 0) ? nullptr : semaphores, // pSignalSemaphores
577 };
578
579 const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
580 if (queue) {
581 RENDER_CPU_PERF_SCOPE("vkQueueSubmit", "");
582 VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
583 1, // submitCount
584 &submitInfo, // pSubmits
585 fence)); // fence
586 }
587 }
588 }
589
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)590 void RenderBackendVk::RenderProcessCommandLists(
591 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
592 {
593 // queue checked in upper level
594
595 const auto cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
596 constexpr uint64_t acquireTaskId { 0xFFFFffff0 };
597 constexpr uint64_t globalDescSetTaskId { 0xFFFFffff1 };
598 bool acquireSubmitted { false };
599 bool globalDescSetSubmitted { false };
600 vector<uint64_t> afterIdentifiers;
601 afterIdentifiers.reserve(2U); // global descriptor sets, and swapchain acquire wait
602 // submit global descset task if needed
603 {
604 auto& descriptorSetMgr = (DescriptorSetManagerVk&)deviceVk_.GetDescriptorSetManager();
605 const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
606 if (!allDescSets.empty()) {
607 globalDescSetSubmitted = true;
608 queue_->Submit(globalDescSetTaskId, FunctionTask::Create([this]() { UpdateGlobalDescriptorSets(); }));
609 }
610 }
611 // submit acquire task if needed
612 if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
613 acquireSubmitted = true;
614 queue_->Submit(acquireTaskId, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
615 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
616 }));
617 }
618 uint64_t secondaryIdx = cmdBufferCount;
619 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
620 afterIdentifiers.clear();
621 // add wait for acquire if needed
622 if (acquireSubmitted && (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx)) {
623 afterIdentifiers.push_back(acquireTaskId);
624 }
625 // NOTE: idx increase
626 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
627 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
628 PLUGIN_ASSERT(mrpData.subpassCount > 0);
629 const uint32_t rcCount = mrpData.subpassCount;
630 if (rcCount > (cmdBufferCount - cmdBufferIdx)) {
631 PLUGIN_LOG_E("Invalid render command context");
632 continue;
633 }
634
635 // add backend position commands
636 const auto& backendCommands = ref.renderCommandList->GetProcessBackendCommands();
637 processBackendCommands_.append(backendCommands.begin(), backendCommands.end());
638
639 // add wait for global descriptor sets if needed
640 // add safety wait for secondary command lists always (NOTE: needs to further optimized)
641 bool hasGlobalDescriptorSetBindings = false;
642 if (globalDescSetSubmitted) {
643 auto first = renderCommandFrameData.renderCommandContexts.cbegin() + cmdBufferIdx;
644 auto last = first + rcCount;
645 hasGlobalDescriptorSetBindings = std::any_of(first, last, [](const RenderCommandContext& ref) {
646 return ref.renderCommandList->HasGlobalDescriptorSetBindings();
647 });
648 }
649 if (globalDescSetSubmitted && (mrpData.secondaryCmdLists || hasGlobalDescriptorSetBindings)) {
650 afterIdentifiers.push_back(globalDescSetTaskId);
651 }
652 if (mrpData.secondaryCmdLists) {
653 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
654 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
655 const uint64_t submitId = secondaryIdx++;
656 afterIdentifiers.push_back(submitId);
657 queue_->SubmitAfter(afterIdentifiers, submitId,
658 FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
659 const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
660 MultiRenderCommandListDesc mrcDesc;
661 mrcDesc.multiRenderCommandListCount = 1u;
662 mrcDesc.baseContext = nullptr;
663 mrcDesc.secondaryCommandBuffer = true;
664 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
665 const DebugNames debugNames { ref2.debugName,
666 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
667 RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
668 }));
669 }
670 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
671 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
672 MultiRenderCommandListDesc mrcDesc;
673 mrcDesc.multiRenderCommandListCount = rcCount;
674 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
675 const DebugNames debugNames { ref2.debugName, ref2.debugName };
676 RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
677 }));
678 } else {
679 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
680 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
681 MultiRenderCommandListDesc mrcDesc;
682 mrcDesc.multiRenderCommandListCount = rcCount;
683 if (rcCount > 1) {
684 mrcDesc.multiRenderNodeCmdList = true;
685 mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
686 }
687 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
688 const uint32_t currIdx = cmdBufferIdx + rcIdx;
689 mrcDesc.multiRenderCommandListIndex = rcIdx;
690 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
691 const DebugNames debugNames { ref2.debugName,
692 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
693 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
694 }
695 }));
696 }
697 // idx increase
698 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
699 }
700
701 // process before acquire commands here
702 ProcessBackendPositionCommands(device_, RenderBackendCommandPosition::BEFORE_ACQUIRE, processBackendCommands_);
703
704 // execute and wait for completion.
705 queue_->Execute();
706 queue_->Clear();
707 }
708
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)709 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
710 RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
711 const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
712 {
713 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
714 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
715 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
716
717 const ContextCommandPoolVk& ptrCmdPool =
718 (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
719 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
720
721 // begin cmd buffer
722 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
723 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
724 const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
725 if (valid) {
726 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
727 ptrCmdPool.commandPool, // commandPool
728 commandPoolResetFlags)); // flags
729 }
730
731 constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
732 VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
733 };
734 const VkCommandBufferBeginInfo commandBufferBeginInfo {
735 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
736 nullptr, // pNext
737 commandBufferUsageFlags, // flags
738 nullptr, // pInheritanceInfo
739 };
740 if (valid) {
741 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
742 &commandBufferBeginInfo)); // pBeginInfo
743 }
744
745 StateCache stateCache;
746
747 const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
748 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
749 const auto commandCount = static_cast<uint32_t>(rcRef.size());
750 const RenderCommandBeginRenderPass* rcBeginRenderPass =
751 (mrpcld.rpBeginCmdIndex < commandCount)
752 ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
753 : nullptr;
754 const RenderCommandEndRenderPass* rcEndRenderPass =
755 (mrpcld.rpEndCmdIndex < commandCount)
756 ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
757 : nullptr;
758
759 if (rcBeginRenderPass && rcEndRenderPass) {
760 if (mrpcld.rpBarrierCmdIndex < commandCount) {
761 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
762 PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
763 const RenderCommandBarrierPoint& barrierPoint =
764 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
765 // handle all barriers before render command that needs resource syncing
766 RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
767 }
768
769 // begin render pass
770 stateCache.primaryRenderPass = true;
771 RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
772 stateCache.primaryRenderPass = false;
773
774 // get secondary command buffers from correct indices and execute
775 for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
776 const uint32_t currCmdBufIdx = cmdBufIdx + idx;
777 PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
778 const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
779 NodeContextPoolManagerVk& contextPoolVk =
780 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
781
782 const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
783 const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
784 const auto mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
785 // next subpass only called from second render pass on
786 if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
787 RenderCommandBeginRenderPass renderPass =
788 *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
789 renderPass.renderPassDesc.subpassContents =
790 SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
791 stateCache.renderCommandBeginRenderPass = nullptr; // reset
792 RenderCommand(
793 renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
794 }
795 RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
796 }
797
798 // end render pass (replace the primary render pass)
799 stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
800 // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
801 constexpr RenderCommandEndRenderPass rcerp = {};
802 RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
803 }
804
805 // end cmd buffer
806 if (valid) {
807 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
808 }
809
810 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
811 }
812
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)813 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
814 const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
815 {
816 if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
817 vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
818 1u, // commandBufferCount
819 &executeCmdBuffer.commandBuffer); // pCommandBuffers
820 }
821 }
822
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)823 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
824 const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
825 {
826 auto& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
827
828 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
829 const auto cmdCount = static_cast<uint32_t>(rcRef.size());
830
831 const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
832 PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
833 PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
834 if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
835 const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
836 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
837 const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
838 LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
839
840 const uint32_t subpass = renderCmd.subpassStartIndex;
841 return VkCommandBufferInheritanceInfo {
842 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
843 nullptr, // pNext
844 lowLevelRenderPassData.renderPass, // renderPass
845 subpass, // subpass
846 VK_NULL_HANDLE, // framebuffer
847 VK_FALSE, // occlusionQueryEnable
848 0, // queryFlags
849 0, // pipelineStatistics
850 };
851 } else {
852 return VkCommandBufferInheritanceInfo {};
853 }
854 }
855
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)856 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
857 const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
858 {
859 // these are validated in render graph
860 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
861 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
862 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
863 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
864 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
865
866 #if (RENDER_PERF_ENABLED == 1)
867 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
868 const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
869 const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
870 #endif
871 PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
872 PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
873 if (perfDataSet) {
874 perfDataSet->cpuTimer.Begin();
875 }
876
877 RENDER_CPU_PERF_SCOPE("RenderSingleCommandList", debugNames.renderCommandBufferName);
878 #endif
879
880 contextPoolMgr.BeginBackendFrame();
881 ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
882 nodeContextPsoMgr.BeginBackendFrame();
883
884 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
885
886 StateCache stateCache = {}; // state cache for this render command list
887 stateCache.backendNode = renderCommandCtx.renderBackendNode;
888 stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
889
890 // command buffer has been wait with a single frame fence
891 const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
892 const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
893 const bool endCommandBuffer =
894 (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
895 const ContextCommandPoolVk* ptrCmdPool = nullptr;
896 if (mrclDesc.multiRenderNodeCmdList) {
897 PLUGIN_ASSERT(mrclDesc.baseContext);
898 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
899 ->GetContextCommandPool();
900 } else if (mrclDesc.secondaryCommandBuffer) {
901 PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
902 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
903 } else {
904 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
905 }
906
907 // update cmd list context descriptor sets
908 UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
909
910 PLUGIN_ASSERT(ptrCmdPool);
911 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
912
913 if (beginCommandBuffer) {
914 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
915 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
916 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
917 ptrCmdPool->commandPool, // commandPool
918 commandPoolResetFlags)); // flags
919
920 VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
921 VkCommandBufferInheritanceInfo inheritanceInfo {};
922 if (stateCache.secondaryCommandBuffer) {
923 commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
924 inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
925 }
926 const VkCommandBufferBeginInfo commandBufferBeginInfo {
927 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
928 nullptr, // pNext
929 commandBufferUsageFlags, // flags
930 mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
931 };
932
933 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
934 &commandBufferBeginInfo)); // pBeginInfo
935
936 #if (RENDER_PERF_ENABLED == 1)
937 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
938 if (validGpuQueries) {
939 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
940 PLUGIN_ASSERT(gpuQuery);
941
942 gpuQuery->NextQueryIndex();
943
944 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
945 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
946 }
947 #endif
948 #endif
949 }
950
951 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
952 {
953 BeginDebugMarker(cmdBuffer, debugNames.renderCommandListName, { 1.f, 1.f, 1.f, 1.f });
954 }
955 #endif
956
957 for (const auto& ref : rcRef) {
958 if (!stateCache.validCommandList) {
959 #if (RENDER_VALIDATION_ENABLED == 1)
960 PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
961 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
962 debugNames.renderCommandListName.data());
963 #endif
964 break;
965 }
966
967 PLUGIN_ASSERT(ref.rc);
968 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
969 {
970 const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
971 BeginDebugMarker(cmdBuffer, COMMAND_NAMES[index], { 0.87f, 0.83f, 0.29f, 1.f });
972 }
973 #endif
974
975 switch (ref.type) {
976 case RenderCommandType::BARRIER_POINT: {
977 if (!stateCache.secondaryCommandBuffer) {
978 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
979 // handle all barriers before render command that needs resource syncing
980 RenderCommand(
981 barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
982 }
983 break;
984 }
985 case RenderCommandType::DRAW: {
986 RenderCommand(
987 *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
988 break;
989 }
990 case RenderCommandType::DRAW_INDIRECT: {
991 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
992 contextPoolMgr, stateCache);
993 break;
994 }
995 case RenderCommandType::DISPATCH: {
996 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
997 contextPoolMgr, stateCache);
998 break;
999 }
1000 case RenderCommandType::DISPATCH_INDIRECT: {
1001 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1002 contextPoolMgr, stateCache);
1003 break;
1004 }
1005 case RenderCommandType::BIND_PIPELINE: {
1006 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1007 contextPoolMgr, stateCache);
1008 break;
1009 }
1010 case RenderCommandType::BEGIN_RENDER_PASS: {
1011 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1012 contextPoolMgr, stateCache);
1013 break;
1014 }
1015 case RenderCommandType::NEXT_SUBPASS: {
1016 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1017 contextPoolMgr, stateCache);
1018 break;
1019 }
1020 case RenderCommandType::END_RENDER_PASS: {
1021 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1022 contextPoolMgr, stateCache);
1023 break;
1024 }
1025 case RenderCommandType::BIND_VERTEX_BUFFERS: {
1026 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1027 contextPoolMgr, stateCache);
1028 break;
1029 }
1030 case RenderCommandType::BIND_INDEX_BUFFER: {
1031 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1032 contextPoolMgr, stateCache);
1033 break;
1034 }
1035 case RenderCommandType::COPY_BUFFER: {
1036 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1037 contextPoolMgr, stateCache);
1038 break;
1039 }
1040 case RenderCommandType::COPY_BUFFER_IMAGE: {
1041 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1042 contextPoolMgr, stateCache);
1043 break;
1044 }
1045 case RenderCommandType::COPY_IMAGE: {
1046 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1047 contextPoolMgr, stateCache);
1048 break;
1049 }
1050 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1051 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1052 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1053 break;
1054 }
1055 case RenderCommandType::PUSH_CONSTANT: {
1056 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1057 contextPoolMgr, stateCache);
1058 break;
1059 }
1060 case RenderCommandType::BLIT_IMAGE: {
1061 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1062 contextPoolMgr, stateCache);
1063 break;
1064 }
1065 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1066 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1067 nodeContextPsoMgr, contextPoolMgr, stateCache);
1068 break;
1069 }
1070 case RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES: {
1071 RenderCommand(*static_cast<RenderCommandCopyAccelerationStructureInstances*>(ref.rc), cmdBuffer,
1072 nodeContextPsoMgr, contextPoolMgr, stateCache);
1073 break;
1074 }
1075 case RenderCommandType::CLEAR_COLOR_IMAGE: {
1076 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1077 contextPoolMgr, stateCache);
1078 break;
1079 }
1080 // dynamic states
1081 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1082 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1083 contextPoolMgr, stateCache);
1084 break;
1085 }
1086 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1087 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1088 contextPoolMgr, stateCache);
1089 break;
1090 }
1091 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1092 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1093 contextPoolMgr, stateCache);
1094 break;
1095 }
1096 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1097 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1098 contextPoolMgr, stateCache);
1099 break;
1100 }
1101 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1102 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1103 nodeContextPsoMgr, contextPoolMgr, stateCache);
1104 break;
1105 }
1106 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1107 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1108 contextPoolMgr, stateCache);
1109 break;
1110 }
1111 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1112 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1113 contextPoolMgr, stateCache);
1114 break;
1115 }
1116 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1117 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1118 nodeContextPsoMgr, contextPoolMgr, stateCache);
1119 break;
1120 }
1121 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1122 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1123 nodeContextPsoMgr, contextPoolMgr, stateCache);
1124 break;
1125 }
1126 //
1127 case RenderCommandType::WRITE_TIMESTAMP: {
1128 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1129 contextPoolMgr, stateCache);
1130 break;
1131 }
1132 case RenderCommandType::UNDEFINED:
1133 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1134 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1135 case RenderCommandType::BEGIN_DEBUG_MARKER:
1136 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1137 RenderCommand(*static_cast<RenderCommandBeginDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1138 contextPoolMgr, stateCache);
1139 #endif
1140 break;
1141 case RenderCommandType::END_DEBUG_MARKER:
1142 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1143 RenderCommand(*static_cast<RenderCommandEndDebugMarker*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1144 contextPoolMgr, stateCache);
1145 #endif
1146 break;
1147 default: {
1148 PLUGIN_ASSERT(false && "non-valid render command");
1149 break;
1150 }
1151 }
1152 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1153 {
1154 EndDebugMarker(cmdBuffer);
1155 }
1156 #endif
1157 }
1158
1159 if ((!presentationData_.infos.empty())) {
1160 RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1161 }
1162
1163 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1164 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1165 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1166 }
1167 #endif
1168
1169 #if (RENDER_PERF_ENABLED == 1)
1170 // copy counters
1171 if (perfDataSet) {
1172 CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1173 }
1174 #endif
1175
1176 if (endCommandBuffer) {
1177 #if (RENDER_PERF_ENABLED == 1)
1178 if (perfDataSet) {
1179 perfDataSet->cpuTimer.End();
1180 }
1181 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1182 if (validGpuQueries) {
1183 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1184 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1185 }
1186 #endif
1187 CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1188 #endif
1189
1190 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1191
1192 if (mrclDesc.secondaryCommandBuffer) {
1193 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1194 } else {
1195 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1196 }
1197 }
1198 }
1199
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1200 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1201 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1202 {
1203 const RenderHandle psoHandle = renderCmd.psoHandle;
1204 const auto pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1205
1206 stateCache.psoHandle = psoHandle;
1207
1208 VkPipeline pipeline { VK_NULL_HANDLE };
1209 VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1210 if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1211 const auto* pso = static_cast<const ComputePipelineStateObjectVk*>(
1212 psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1213 if (pso) {
1214 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1215 pipeline = plat.pipeline;
1216 pipelineLayout = plat.pipelineLayout;
1217 }
1218 } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1219 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1220 if (stateCache.renderCommandBeginRenderPass) {
1221 uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1222 if (stateCache.pipelineDescSetHash != 0) {
1223 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1224 }
1225 const auto* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1226 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1227 stateCache.renderCommandBeginRenderPass->subpasses,
1228 stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1229 &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1230 if (pso) {
1231 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1232 pipeline = plat.pipeline;
1233 pipelineLayout = plat.pipelineLayout;
1234 }
1235 }
1236 }
1237
1238 // NOTE: render front-end expects pso binding after begin render pass
1239 // in some situations the render pass might change and therefore the pipeline changes
1240 // in some situations the render pass is the same and the rebinding is not needed
1241 const bool newPipeline = (pipeline != stateCache.pipeline);
1242 const bool valid = (pipeline != VK_NULL_HANDLE);
1243 if (valid && newPipeline) {
1244 stateCache.pipeline = pipeline;
1245 stateCache.pipelineLayout = pipelineLayout;
1246 stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1247 vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1248 pipelineBindPoint, // pipelineBindPoint
1249 pipeline); // pipeline
1250 #if (RENDER_PERF_ENABLED == 1)
1251 stateCache.perfCounters.bindPipelineCount++;
1252 #endif
1253 }
1254 }
1255
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1256 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1257 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1258 {
1259 if (stateCache.validBindings) {
1260 if (renderCmd.indexCount) {
1261 vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1262 renderCmd.indexCount, // indexCount
1263 renderCmd.instanceCount, // instanceCount
1264 renderCmd.firstIndex, // firstIndex
1265 renderCmd.vertexOffset, // vertexOffset
1266 renderCmd.firstInstance); // firstInstance
1267 #if (RENDER_PERF_ENABLED == 1)
1268 stateCache.perfCounters.drawCount++;
1269 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1270 stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1271 #endif
1272 } else {
1273 vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1274 renderCmd.vertexCount, // vertexCount
1275 renderCmd.instanceCount, // instanceCount
1276 renderCmd.firstVertex, // firstVertex
1277 renderCmd.firstInstance); // firstInstance
1278 #if (RENDER_PERF_ENABLED == 1)
1279 stateCache.perfCounters.drawCount++;
1280 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1281 stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1282 * renderCmd.instanceCount;
1283 #endif
1284 }
1285 }
1286 }
1287
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1288 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1289 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1290 {
1291 if (stateCache.validBindings) {
1292 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1293 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1294 const VkBuffer buffer = plat.buffer;
1295 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1296 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1297 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1298 buffer, // buffer
1299 offset, // offset
1300 renderCmd.drawCount, // drawCount
1301 renderCmd.stride); // stride
1302 } else {
1303 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1304 buffer, // buffer
1305 (VkDeviceSize)renderCmd.offset, // offset
1306 renderCmd.drawCount, // drawCount
1307 renderCmd.stride); // stride
1308 }
1309 #if (RENDER_PERF_ENABLED == 1)
1310 stateCache.perfCounters.drawIndirectCount++;
1311 #endif
1312 }
1313 }
1314 }
1315
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1316 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1317 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1318 {
1319 if (stateCache.validBindings) {
1320 vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1321 renderCmd.groupCountX, // groupCountX
1322 renderCmd.groupCountY, // groupCountY
1323 renderCmd.groupCountZ); // groupCountZ
1324 #if (RENDER_PERF_ENABLED == 1)
1325 stateCache.perfCounters.dispatchCount++;
1326 #endif
1327 }
1328 }
1329
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1330 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1331 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1332 const StateCache& stateCache)
1333 {
1334 if (stateCache.validBindings) {
1335 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1336 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1337 const VkBuffer buffer = plat.buffer;
1338 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1339 vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1340 buffer, // buffer
1341 offset); // offset
1342 #if (RENDER_PERF_ENABLED == 1)
1343 stateCache.perfCounters.dispatchIndirectCount++;
1344 #endif
1345 }
1346 }
1347 }
1348
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1349 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1350 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1351 StateCache& stateCache)
1352 {
1353 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1354 stateCache.renderCommandBeginRenderPass = &renderCmd;
1355
1356 auto& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1357 // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1358 stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1359
1360 // early out for multi render command list render pass
1361 if (stateCache.secondaryCommandBuffer) {
1362 return; // early out
1363 }
1364 const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1365 (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1366 // invalidate the whole command list
1367 if (!validRpFbo) {
1368 stateCache.validCommandList = false;
1369 return; // early out
1370 }
1371
1372 if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1373 if (renderCmd.subpassStartIndex < renderCmd.subpasses.size()) {
1374 if ((renderCmd.subpasses[renderCmd.subpassStartIndex].subpassFlags &
1375 SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
1376 const auto subpassContents = static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1377 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1378 subpassContents); // contents
1379 }
1380 }
1381 return; // early out
1382 }
1383
1384 const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1385
1386 VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1387 bool hasClearValues = false;
1388 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1389 const auto& ref = renderPassDesc.attachments[idx];
1390 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1391 ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1392 const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1393 VkClearValue cVal;
1394 if (RenderHandleUtil::IsDepthImage(handle)) {
1395 PLUGIN_STATIC_ASSERT(sizeof(cVal.depthStencil) == sizeof(ref.clearValue.depthStencil));
1396 cVal.depthStencil.depth = ref.clearValue.depthStencil.depth;
1397 cVal.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1398 } else {
1399 PLUGIN_STATIC_ASSERT(sizeof(cVal.color) == sizeof(ref.clearValue.color));
1400 CloneData(&cVal.color, sizeof(cVal.color), &ref.clearValue.color, sizeof(ref.clearValue.color));
1401 }
1402 clearValues[idx] = cVal;
1403 hasClearValues = true;
1404 }
1405 }
1406
1407 // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1408 // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1409 const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1410
1411 VkRect2D renderArea {
1412 { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1413 { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1414 };
1415 // render area needs to be inside frame buffer
1416 const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1417 renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1418 renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1419 renderArea.extent.width = Math::min(renderArea.extent.width,
1420 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1421 renderArea.extent.height = Math::min(renderArea.extent.height,
1422 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1423
1424 const VkRenderPassBeginInfo renderPassBeginInfo {
1425 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // sType
1426 nullptr, // pNext
1427 stateCache.lowLevelRenderPassData.renderPass, // renderPass
1428 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1429 renderArea, // renderArea
1430 clearValueCount, // clearValueCount
1431 clearValues, // pClearValues
1432 };
1433
1434 // NOTE: could be patched in render graph
1435 // const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1436 const VkSubpassContents subpassContents =
1437 stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1438 vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1439 &renderPassBeginInfo, // pRenderPassBegin
1440 subpassContents); // contents
1441 #if (RENDER_PERF_ENABLED == 1)
1442 stateCache.perfCounters.renderPassCount++;
1443 #endif
1444 }
1445
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1446 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1447 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1448 {
1449 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1450
1451 const auto subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1452 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1453 subpassContents); // contents
1454 }
1455
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1456 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1457 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1458 {
1459 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1460
1461 // early out for multi render command list render pass
1462 if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1463 return; // NOTE
1464 }
1465
1466 stateCache.renderCommandBeginRenderPass = nullptr;
1467 stateCache.lowLevelRenderPassData = {};
1468
1469 if (!stateCache.secondaryCommandBuffer) {
1470 vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1471 }
1472 }
1473
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1474 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1475 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1476 const StateCache& stateCache)
1477 {
1478 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1479 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1480
1481 const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1482
1483 VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1484 VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1485 const GpuBufferVk* gpuBuffer = nullptr;
1486 RenderHandle currBufferHandle;
1487 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1488 const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1489 // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1490 // do not re-fetch the buffer if not needed
1491 if (currBufferHandle.id != currVb.bufferHandle.id) {
1492 currBufferHandle = currVb.bufferHandle;
1493 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1494 }
1495 if (gpuBuffer) {
1496 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1497 const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1498 vertexBuffers[idx] = plat.buffer;
1499 offsets[idx] = offset;
1500 }
1501 }
1502
1503 vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1504 0, // firstBinding
1505 vertexBufferCount, // bindingCount
1506 vertexBuffers, // pBuffers
1507 offsets); // pOffsets
1508 }
1509
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1510 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1511 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1512 const StateCache& stateCache)
1513 {
1514 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1515 gpuBuffer) {
1516 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1517 const VkBuffer buffer = plat.buffer;
1518 const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1519 const auto indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1520
1521 vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1522 buffer, // buffer
1523 offset, // offset
1524 indexType); // indexType
1525 }
1526 }
1527
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1528 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1529 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1530 {
1531 const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1532 const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1533 if (srcImagePtr && dstImagePtr) {
1534 const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1535 const auto& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1536
1537 const ImageBlit& ib = renderCmd.imageBlit;
1538 const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1539 ? srcPlatImage.arrayLayers
1540 : ib.srcSubresource.layerCount;
1541 const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1542 ? dstPlatImage.arrayLayers
1543 : ib.dstSubresource.layerCount;
1544
1545 const VkImageSubresourceLayers srcSubresourceLayers {
1546 (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1547 ib.srcSubresource.mipLevel, // mipLevel
1548 ib.srcSubresource.baseArrayLayer, // baseArrayLayer
1549 srcLayerCount, // layerCount
1550 };
1551 const VkImageSubresourceLayers dstSubresourceLayers {
1552 (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1553 ib.dstSubresource.mipLevel, // mipLevel
1554 ib.dstSubresource.baseArrayLayer, // baseArrayLayer
1555 dstLayerCount, // layerCount
1556 };
1557
1558 const VkImageBlit imageBlit {
1559 srcSubresourceLayers, // srcSubresource
1560 { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1561 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1562 (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1563 dstSubresourceLayers, // dstSubresource
1564 { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1565 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1566 (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1567 };
1568
1569 vkCmdBlitImage(cmdBuf.commandBuffer, // commandBuffer
1570 srcPlatImage.image, // srcImage
1571 (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1572 dstPlatImage.image, // dstImage
1573 (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1574 1, // regionCount
1575 &imageBlit, // pRegions
1576 (VkFilter)renderCmd.filter); // filter
1577 }
1578 }
1579
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1580 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1581 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1582 {
1583 const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1584 const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1585
1586 if (srcGpuBuffer && dstGpuBuffer) {
1587 const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1588 const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1589 const VkBufferCopy bufferCopy {
1590 renderCmd.bufferCopy.srcOffset,
1591 renderCmd.bufferCopy.dstOffset,
1592 renderCmd.bufferCopy.size,
1593 };
1594
1595 if (bufferCopy.size > 0) {
1596 vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1597 srcBuffer, // srcBuffer
1598 dstBuffer, // dstBuffer
1599 1, // regionCount
1600 &bufferCopy); // pRegions
1601 }
1602 }
1603 }
1604
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1605 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1606 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1607 const StateCache& stateCache)
1608 {
1609 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1610 PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1611 return;
1612 }
1613
1614 const GpuBufferVk* gpuBuffer = nullptr;
1615 const GpuImageVk* gpuImage = nullptr;
1616 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1617 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1618 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1619 } else {
1620 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1621 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1622 }
1623
1624 if (gpuBuffer && gpuImage) {
1625 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1626 const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1627 const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1628 const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1629 ? platImage.arrayLayers
1630 : subresourceLayer.layerCount;
1631 const VkImageSubresourceLayers imageSubresourceLayer {
1632 (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1633 subresourceLayer.mipLevel,
1634 subresourceLayer.baseArrayLayer,
1635 layerCount,
1636 };
1637 const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1638 // Math::min to force staying inside image
1639 const uint32_t mip = subresourceLayer.mipLevel;
1640 const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1641 const Size3D& imageOffset = bufferImageCopy.imageOffset;
1642 const VkExtent3D imageExtent = {
1643 Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1644 Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1645 Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1646 };
1647 const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1648 (imageOffset.depth < imageSize.depth);
1649 const VkBufferImageCopy bufferImageCopyVk {
1650 bufferImageCopy.bufferOffset,
1651 bufferImageCopy.bufferRowLength,
1652 bufferImageCopy.bufferImageHeight,
1653 imageSubresourceLayer,
1654 { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1655 static_cast<int32_t>(imageOffset.depth) },
1656 imageExtent,
1657 };
1658
1659 const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1660 const VkImage image = (gpuImage->GetPlatformData()).image;
1661
1662 if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1663 vkCmdCopyBufferToImage(cmdBuf.commandBuffer, // commandBuffer
1664 buffer, // srcBuffer
1665 image, // dstImage
1666 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1667 1, // regionCount
1668 &bufferImageCopyVk); // pRegions
1669 } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1670 vkCmdCopyImageToBuffer(cmdBuf.commandBuffer, // commandBuffer
1671 image, // srcImage
1672 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1673 buffer, // dstBuffer
1674 1, // regionCount
1675 &bufferImageCopyVk); // pRegions
1676 }
1677 }
1678 }
1679
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1680 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1681 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1682 {
1683 const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1684 const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1685 if (srcGpuImage && dstGpuImage) {
1686 const ImageCopy& copy = renderCmd.imageCopy;
1687 const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1688 const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1689
1690 const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1691 const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1692 const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1693 ? srcPlatImage.arrayLayers
1694 : srcSubresourceLayer.layerCount;
1695 const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1696 ? dstPlatImage.arrayLayers
1697 : dstSubresourceLayer.layerCount;
1698
1699 const VkImageSubresourceLayers srcImageSubresourceLayer {
1700 (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1701 srcSubresourceLayer.mipLevel,
1702 srcSubresourceLayer.baseArrayLayer,
1703 srcLayerCount,
1704 };
1705 const VkImageSubresourceLayers dstImageSubresourceLayer {
1706 (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1707 dstSubresourceLayer.mipLevel,
1708 dstSubresourceLayer.baseArrayLayer,
1709 dstLayerCount,
1710 };
1711
1712 const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1713 const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1714
1715 VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1716 ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1717 ext.height =
1718 Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1719 ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1720
1721 const VkImageCopy imageCopyVk {
1722 srcImageSubresourceLayer, // srcSubresource
1723 { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1724 dstImageSubresourceLayer, // dstSubresource
1725 { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1726 ext, // extent
1727 };
1728 vkCmdCopyImage(cmdBuf.commandBuffer, // commandBuffer
1729 srcPlatImage.image, // srcImage
1730 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1731 dstPlatImage.image, // dstImage
1732 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1733 1, // regionCount
1734 &imageCopyVk); // pRegions
1735 }
1736 }
1737
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1738 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1739 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1740 const RenderBarrierList& rbl)
1741 {
1742 if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1743 return;
1744 }
1745
1746 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1747 rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1748 PLUGIN_ASSERT(barrierPointBarriers);
1749 if (!barrierPointBarriers) {
1750 return;
1751 }
1752 constexpr uint32_t maxBarrierCount { 8 };
1753 VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1754 VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1755 VkMemoryBarrier memoryBarriers[maxBarrierCount];
1756
1757 // generally there is only single barrierListCount per barrier point
1758 // in situations with batched render passes there can be many
1759 // NOTE: all barrier lists could be patched to single vk command if needed
1760 // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1761 const auto barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1762 const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1763 #if (RENDER_VALIDATION_ENABLED == 1)
1764 uint32_t fullBarrierCount = 0u;
1765 #endif
1766 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1767 if (nextBarrierList == nullptr) { // cannot be null, just a safety
1768 PLUGIN_ASSERT(false);
1769 return;
1770 }
1771 const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1772 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1773 const auto barrierCount = (uint32_t)barrierListRef.count;
1774
1775 uint32_t bufferBarrierIdx = 0;
1776 uint32_t imageBarrierIdx = 0;
1777 uint32_t memoryBarrierIdx = 0;
1778
1779 VkPipelineStageFlags srcPipelineStageMask { 0 };
1780 VkPipelineStageFlags dstPipelineStageMask { 0 };
1781 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1782
1783 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1784 const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1785
1786 uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1787 uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1788 if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1789 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1790 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1791 }
1792
1793 const RenderHandle resourceHandle = ref.resourceHandle;
1794 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1795
1796 PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1797 (handleType == RenderHandleType::GPU_IMAGE));
1798
1799 const auto srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1800 const auto dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1801
1802 srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1803 dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1804
1805 // NOTE: zero size buffer barriers allowed ATM
1806 if (handleType == RenderHandleType::GPU_BUFFER) {
1807 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1808 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1809 // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1810 const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1811 const VkDeviceSize size =
1812 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1813 (VkDeviceSize)ref.dst.optionalByteSize);
1814 if (platBuffer.buffer) {
1815 bufferMemoryBarriers[bufferBarrierIdx++] = {
1816 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1817 nullptr, // pNext
1818 srcAccessMask, // srcAccessMask
1819 dstAccessMask, // dstAccessMask
1820 srcQueueFamilyIndex, // srcQueueFamilyIndex
1821 dstQueueFamilyIndex, // dstQueueFamilyIndex
1822 platBuffer.buffer, // buffer
1823 offset, // offset
1824 size, // size
1825 };
1826 }
1827 }
1828 } else if (handleType == RenderHandleType::GPU_IMAGE) {
1829 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1830 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1831
1832 const auto srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1833 const auto dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1834
1835 const VkImageAspectFlags imageAspectFlags =
1836 (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1837 ? platImage.aspectFlags
1838 : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1839
1840 const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1841 PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1842 ? VK_REMAINING_MIP_LEVELS
1843 : ref.src.optionalImageSubresourceRange.levelCount;
1844
1845 const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1846 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1847 ? VK_REMAINING_ARRAY_LAYERS
1848 : ref.src.optionalImageSubresourceRange.layerCount;
1849
1850 const VkImageSubresourceRange imageSubresourceRange {
1851 imageAspectFlags, // aspectMask
1852 ref.src.optionalImageSubresourceRange.baseMipLevel, // baseMipLevel
1853 levelCount, // levelCount
1854 ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1855 layerCount, // layerCount
1856 };
1857
1858 if (platImage.image) {
1859 imageMemoryBarriers[imageBarrierIdx++] = {
1860 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1861 nullptr, // pNext
1862 srcAccessMask, // srcAccessMask
1863 dstAccessMask, // dstAccessMask
1864 srcImageLayout, // oldLayout
1865 dstImageLayout, // newLayout
1866 srcQueueFamilyIndex, // srcQueueFamilyIndex
1867 dstQueueFamilyIndex, // dstQueueFamilyIndex
1868 platImage.image, // image
1869 imageSubresourceRange, // subresourceRange
1870 };
1871 }
1872 }
1873 } else {
1874 memoryBarriers[memoryBarrierIdx++] = {
1875 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1876 nullptr, // pNext
1877 srcAccessMask, // srcAccessMask
1878 dstAccessMask, // dstAccessMask
1879 };
1880 }
1881
1882 const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1883 const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1884 (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)));
1885
1886 if (hasBarriers && resetBarriers) {
1887 #if (RENDER_VALIDATION_ENABLED == 1)
1888 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1889 #endif
1890 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1891 srcPipelineStageMask, // srcStageMask
1892 dstPipelineStageMask, // dstStageMask
1893 dependencyFlags, // dependencyFlags
1894 memoryBarrierIdx, // memoryBarrierCount
1895 memoryBarriers, // pMemoryBarriers
1896 bufferBarrierIdx, // bufferMemoryBarrierCount
1897 bufferMemoryBarriers, // pBufferMemoryBarriers
1898 imageBarrierIdx, // imageMemoryBarrierCount
1899 imageMemoryBarriers); // pImageMemoryBarriers
1900
1901 bufferBarrierIdx = 0;
1902 imageBarrierIdx = 0;
1903 memoryBarrierIdx = 0;
1904 }
1905 }
1906 }
1907 #if (RENDER_VALIDATION_ENABLED == 1)
1908 if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1909 PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1910 "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1911 barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1912 }
1913 #endif
1914 }
1915
1916 namespace {
1917 struct DescriptorSetUpdateDataStruct {
1918 uint32_t accelIndex { 0U };
1919 uint32_t bufferIndex { 0U };
1920 uint32_t imageIndex { 0U };
1921 uint32_t samplerIndex { 0U };
1922 uint32_t writeBindIdx { 0U };
1923 };
1924
UpdateSingleDescriptorSet(const GpuResourceManager & gpuResourceMgr,RenderBackendVk::StateCache * stateCache,const LowLevelDescriptorSetVk * descriptorSet,const DescriptorSetLayoutBindingResourcesHandler & bindingResources,LowLevelContextDescriptorWriteDataVk & wd,DescriptorSetUpdateDataStruct & dsud)1925 void UpdateSingleDescriptorSet(const GpuResourceManager& gpuResourceMgr, RenderBackendVk::StateCache* stateCache,
1926 const LowLevelDescriptorSetVk* descriptorSet, const DescriptorSetLayoutBindingResourcesHandler& bindingResources,
1927 LowLevelContextDescriptorWriteDataVk& wd, DescriptorSetUpdateDataStruct& dsud)
1928 {
1929 // actual vulkan descriptor set update
1930 if (descriptorSet && descriptorSet->descriptorSet) {
1931 if ((uint32_t)bindingResources.bindings.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1932 PLUGIN_ASSERT(false);
1933 return;
1934 }
1935 const auto& buffers = bindingResources.buffers;
1936 const auto& images = bindingResources.images;
1937 const auto& samplers = bindingResources.samplers;
1938 for (const auto& refBuf : buffers) {
1939 const auto& ref = refBuf.desc;
1940 const uint32_t descriptorCount = ref.binding.descriptorCount;
1941 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1942 if (descriptorCount == 0) {
1943 continue;
1944 }
1945 const uint32_t arrayOffset = ref.arrayOffset;
1946 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1947 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1948 #if (RENDER_VULKAN_RT_ENABLED == 1)
1949 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1950 // first is the ref, starting from 1 we use array offsets
1951 const BindableBuffer& bRes =
1952 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1953 if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1954 const GpuAccelerationStructurePlatformDataVk& platAccel =
1955 resPtr->GetPlatformDataAccelerationStructure();
1956 wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1957 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1958 nullptr, // pNext
1959 descriptorCount, // accelerationStructureCount
1960 &platAccel.accelerationStructure, // pAccelerationStructures
1961 };
1962 }
1963 }
1964 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1965 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1966 &wd.descriptorAccelInfos[dsud.accelIndex], // pNext
1967 descriptorSet->descriptorSet, // dstSet
1968 ref.binding.binding, // dstBinding
1969 0, // dstArrayElement
1970 descriptorCount, // descriptorCount
1971 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1972 nullptr, // pImageInfo
1973 nullptr, // pBufferInfo
1974 nullptr, // pTexelBufferView
1975 };
1976 dsud.accelIndex += descriptorCount;
1977 #endif
1978 } else {
1979 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1980 // first is the ref, starting from 1 we use array offsets
1981 const BindableBuffer& bRes =
1982 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].desc.resource;
1983 const auto optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1984 if (const GpuBufferVk* resPtr = gpuResourceMgr.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1985 const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1986 // takes into account dynamic ring buffers with mapping
1987 const auto bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1988 const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1989 const VkDeviceSize bufferRange =
1990 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1991 (VkDeviceSize)bRes.byteSize);
1992 wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1993 platBuffer.buffer, // buffer
1994 byteOffset, // offset
1995 bufferRange, // range
1996 };
1997 }
1998 }
1999 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2000 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2001 nullptr, // pNext
2002 descriptorSet->descriptorSet, // dstSet
2003 ref.binding.binding, // dstBinding
2004 0, // dstArrayElement
2005 descriptorCount, // descriptorCount
2006 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2007 nullptr, // pImageInfo
2008 &wd.descriptorBufferInfos[dsud.bufferIndex], // pBufferInfo
2009 nullptr, // pTexelBufferView
2010 };
2011 dsud.bufferIndex += descriptorCount;
2012 }
2013 }
2014 for (const auto& refImg : images) {
2015 const auto& ref = refImg.desc;
2016 const uint32_t descriptorCount = ref.binding.descriptorCount;
2017 // skip, array bindings which are bound from first index have also descriptorCount 0
2018 if (descriptorCount == 0) {
2019 continue;
2020 }
2021 const auto descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2022 const uint32_t arrayOffset = ref.arrayOffset;
2023 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2024 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2025 // first is the ref, starting from 1 we use array offsets
2026 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].desc.resource;
2027 if (const GpuImageVk* resPtr = gpuResourceMgr.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2028 VkSampler sampler = VK_NULL_HANDLE;
2029 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2030 const GpuSamplerVk* samplerPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2031 if (samplerPtr) {
2032 sampler = samplerPtr->GetPlatformData().sampler;
2033 }
2034 }
2035 const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2036 const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2037 VkImageView imageView = platImage.imageView;
2038 if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2039 (bRes.layer < platImageViews.layerImageViews.size())) {
2040 imageView = platImageViews.layerImageViews[bRes.layer];
2041 } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2042 if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2043 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2044 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2045 } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2046 imageView = platImageViews.mipImageViews[bRes.mip];
2047 }
2048 }
2049 wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2050 sampler, // sampler
2051 imageView, // imageView
2052 (VkImageLayout)bRes.imageLayout, // imageLayout
2053 };
2054 }
2055 }
2056 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2057 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2058 nullptr, // pNext
2059 descriptorSet->descriptorSet, // dstSet
2060 ref.binding.binding, // dstBinding
2061 0, // dstArrayElement
2062 descriptorCount, // descriptorCount
2063 descriptorType, // descriptorType
2064 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2065 nullptr, // pBufferInfo
2066 nullptr, // pTexelBufferView
2067 };
2068 dsud.imageIndex += descriptorCount;
2069 }
2070 for (const auto& refSam : samplers) {
2071 const auto& ref = refSam.desc;
2072 const uint32_t descriptorCount = ref.binding.descriptorCount;
2073 // skip, array bindings which are bound from first index have also descriptorCount 0
2074 if (descriptorCount == 0) {
2075 continue;
2076 }
2077 const uint32_t arrayOffset = ref.arrayOffset;
2078 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2079 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2080 // first is the ref, starting from 1 we use array offsets
2081 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].desc.resource;
2082 if (const GpuSamplerVk* resPtr = gpuResourceMgr.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2083 const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2084 wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2085 platSampler.sampler, // sampler
2086 VK_NULL_HANDLE, // imageView
2087 VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2088 };
2089 }
2090 }
2091 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2092 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2093 nullptr, // pNext
2094 descriptorSet->descriptorSet, // dstSet
2095 ref.binding.binding, // dstBinding
2096 0, // dstArrayElement
2097 descriptorCount, // descriptorCount
2098 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2099 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2100 nullptr, // pBufferInfo
2101 nullptr, // pTexelBufferView
2102 };
2103 dsud.samplerIndex += descriptorCount;
2104 }
2105 #if (RENDER_PERF_ENABLED == 1)
2106 // count the actual updated descriptors sets, not the api calls
2107 if (stateCache) {
2108 stateCache->perfCounters.updateDescriptorSetCount++;
2109 }
2110 #endif
2111 }
2112 }
2113 } // namespace
2114
UpdateGlobalDescriptorSets()2115 void RenderBackendVk::UpdateGlobalDescriptorSets()
2116 {
2117 RENDER_CPU_PERF_SCOPE("UpdateGlobalDescriptorSets", "");
2118
2119 auto& dsMgr = (DescriptorSetManagerVk&)device_.GetDescriptorSetManager();
2120 LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2121 const auto& allDescSets = dsMgr.GetUpdateDescriptorSetHandles();
2122 const uint32_t upDescriptorSetCount =
2123 static_cast<uint32_t>(Math::min(allDescSets.size(), wd.writeDescriptorSets.size()));
2124 DescriptorSetUpdateDataStruct dsud;
2125
2126 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2127 if (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET) {
2128 continue;
2129 }
2130 const RenderHandle descHandle = allDescSets[descIdx];
2131 // first update gpu descriptor indices
2132 if (!dsMgr.UpdateDescriptorSetGpuHandle(descHandle)) {
2133 continue; // continue if not dirty
2134 }
2135
2136 const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2137 const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2138
2139 UpdateSingleDescriptorSet(gpuResourceMgr_, nullptr, descriptorSet, bindingResources, wd, dsud);
2140
2141 // NOTE: should update perf counters
2142 }
2143
2144 // update if the batch ended or we are the last descriptor set
2145 if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2146 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2147 vkUpdateDescriptorSets(device, // device
2148 dsud.writeBindIdx, // descriptorWriteCount
2149 wd.writeDescriptorSets.data(), // pDescriptorWrites
2150 0, // descriptorCopyCount
2151 nullptr); // pDescriptorCopies
2152 }
2153 }
2154
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)2155 void RenderBackendVk::UpdateCommandListDescriptorSets(
2156 const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
2157 {
2158 auto& dsMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
2159
2160 const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2161 const auto upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
2162 LowLevelContextDescriptorWriteDataVk& wd = dsMgr.GetLowLevelDescriptorWriteData();
2163 DescriptorSetUpdateDataStruct dsud;
2164 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
2165 if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
2166 (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
2167 continue;
2168 }
2169
2170 const RenderHandle descHandle = allDescSets[descIdx];
2171 // first update gpu descriptor indices
2172 if (!dsMgr.UpdateDescriptorSetGpuHandle(descHandle)) {
2173 continue; // continue if not dirty
2174 }
2175
2176 const LowLevelDescriptorSetVk* descriptorSet = dsMgr.GetDescriptorSet(descHandle);
2177 const DescriptorSetLayoutBindingResourcesHandler bindingResources = dsMgr.GetCpuDescriptorSetData(descHandle);
2178
2179 UpdateSingleDescriptorSet(gpuResourceMgr_, &stateCache, descriptorSet, bindingResources, wd, dsud);
2180 }
2181 // update if the batch ended or we are the last descriptor set
2182 if ((upDescriptorSetCount > 0U) && (dsud.writeBindIdx > 0U)) {
2183 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2184 vkUpdateDescriptorSets(device, // device
2185 dsud.writeBindIdx, // descriptorWriteCount
2186 wd.writeDescriptorSets.data(), // pDescriptorWrites
2187 0, // descriptorCopyCount
2188 nullptr); // pDescriptorCopies
2189 }
2190 }
2191
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2192 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2193 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2194 StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2195 {
2196 const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2197
2198 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2199 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2200 const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2201 ? VK_PIPELINE_BIND_POINT_COMPUTE
2202 : VK_PIPELINE_BIND_POINT_GRAPHICS;
2203 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2204
2205 bool valid = (pipelineLayout != VK_NULL_HANDLE);
2206 const uint32_t firstSet = renderCmd.firstSet;
2207 const uint32_t setCount = renderCmd.setCount;
2208 if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2209 uint32_t dynamicOffsetDescriptorSetIndices = 0;
2210 uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2211
2212 VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2213 const uint32_t firstPlusCount = firstSet + setCount;
2214 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2215 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2216 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2217 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2218 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2219
2220 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2221 if (descriptorSet && descriptorSet->descriptorSet) {
2222 descriptorSets[idx] = descriptorSet->descriptorSet;
2223 // update, copy to state cache
2224 PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2225 stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2226 const uint32_t currShift = (idx * 16u);
2227 const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2228 uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2229 stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2230 } else {
2231 valid = false;
2232 }
2233 }
2234 }
2235
2236 uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2237 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2238 uint32_t dynamicOffsetIdx = 0;
2239 // NOTE: optimize
2240 // this code has some safety checks that the offset is not updated for non-dynamic sets
2241 // it could be left on only for validation
2242 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2243 if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2244 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2245 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2246 const auto dodResCount = static_cast<uint32_t>(dod.resources.size());
2247 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2248 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2249 uint32_t byteOffset = 0U;
2250 if (descriptorSetDynamicOffsets.dynamicOffsets &&
2251 (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount)) {
2252 byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dodIdx];
2253 }
2254 dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2255 }
2256 }
2257 }
2258
2259 stateCache.validBindings = valid;
2260 if (stateCache.validBindings) {
2261 if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2262 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2263 pipelineBindPoint, // pipelineBindPoint
2264 pipelineLayout, // layout
2265 firstSet, // firstSet
2266 setCount, // descriptorSetCount
2267 &descriptorSets[firstSet], // pDescriptorSets
2268 dynamicOffsetIdx, // dynamicOffsetCount
2269 dynamicOffsets); // pDynamicOffsets
2270 #if (RENDER_PERF_ENABLED == 1)
2271 stateCache.perfCounters.bindDescriptorSetCount++;
2272 #endif
2273 } else {
2274 // possible pso re-creation and bind of these sets to the new pso
2275 PLUGIN_LOG_E("vkCmdBindDescriptorSets hit des cache");
2276 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2277 (PipelineBindPoint)pipelineBindPoint };
2278 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2279 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2280 }
2281 } else {
2282 PLUGIN_LOG_E("stateCache.validBindings invalid");
2283 }
2284 }
2285 }
2286
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2287 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2288 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2289 {
2290 PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2291 PLUGIN_ASSERT(renderCmd.data);
2292
2293 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2294 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2295
2296 const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0));
2297 PLUGIN_ASSERT(valid);
2298
2299 if (valid) {
2300 const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2301 vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2302 pipelineLayout, // layout
2303 shaderStageFlags, // stageFlags
2304 0, // offset
2305 renderCmd.pushConstant.byteSize, // size
2306 static_cast<void*>(renderCmd.data)); // pValues
2307 }
2308 }
2309
2310 namespace {
2311 struct DeviceAddressOffset {
2312 uint64_t address;
2313 uint64_t offset;
2314 };
2315
GetValidDeviceAddress(const DeviceAddressOffset & bo,bool & valid)2316 inline constexpr VkDeviceOrHostAddressConstKHR GetValidDeviceAddress(const DeviceAddressOffset& bo, bool& valid)
2317 {
2318 valid = valid && (bo.address != 0);
2319 return { bo.address + bo.offset };
2320 }
2321 } // namespace
2322
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2323 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2324 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2325 const StateCache& stateCache)
2326 {
2327 #if (RENDER_VULKAN_RT_ENABLED == 1)
2328 const AsBuildGeometryData& geometry = renderCmd.geometry;
2329
2330 const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(geometry.dstAccelerationStructure);
2331 const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(geometry.scratchBuffer.handle);
2332 if ((!dst) || (!scratchBuffer)) {
2333 return; // early out
2334 }
2335
2336 const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2337 const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2338
2339 bool validAddresses = true;
2340
2341 const size_t arraySize =
2342 renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2343 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2344 vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2345
2346 uint32_t arrayIndex = 0;
2347 const bool isTopLevel =
2348 (geometry.info.type == AccelerationStructureType::CORE_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL);
2349 for (const auto& ref : renderCmd.trianglesView) {
2350 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2351 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2352 nullptr, // pNext
2353 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
2354 {}, // geometry;
2355 VkGeometryFlagsKHR(ref.info.geometryFlags), // flags
2356 };
2357 uint32_t primitiveCount = 0;
2358 const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.vertexData.handle);
2359 const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.indexData.handle);
2360 if (vb && ib) {
2361 const VkDeviceOrHostAddressConstKHR vertexAddress =
2362 GetValidDeviceAddress({ vb->GetPlatformData().deviceAddress, ref.vertexData.offset }, validAddresses);
2363 const VkDeviceOrHostAddressConstKHR indexAddress =
2364 GetValidDeviceAddress({ ib->GetPlatformData().deviceAddress, ref.indexData.offset }, validAddresses);
2365
2366 VkDeviceOrHostAddressConstKHR transformAddress {};
2367 if (RenderHandleUtil::IsValid(ref.transformData.handle)) {
2368 if (const GpuBufferVk* tr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.transformData.handle);
2369 tr) {
2370 transformAddress = GetValidDeviceAddress(
2371 { tr->GetPlatformData().deviceAddress, ref.transformData.offset }, validAddresses);
2372 }
2373 }
2374 primitiveCount = ref.info.indexCount / 3u; // triangles
2375
2376 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2377 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2378 nullptr, // pNext
2379 VkFormat(ref.info.vertexFormat), // vertexFormat
2380 vertexAddress, // vertexData
2381 VkDeviceSize(ref.info.vertexStride), // vertexStride
2382 ref.info.maxVertex, // maxVertex
2383 VkIndexType(ref.info.indexType), // indexType
2384 indexAddress, // indexData
2385 transformAddress, // transformData
2386 };
2387 }
2388 buildRangeInfos[arrayIndex] = {
2389 primitiveCount, // primitiveCount
2390 0u, // primitiveOffset
2391 0u, // firstVertex
2392 0u, // transformOffset
2393 };
2394 arrayIndex++;
2395 }
2396 for (const auto& ref : renderCmd.aabbsView) {
2397 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2398 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2399 nullptr, // pNext
2400 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
2401 {}, // geometry
2402 VkGeometryFlagsKHR(ref.info.geometryFlags), // flags
2403 };
2404 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2405 if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.data.handle); ptr) {
2406 deviceAddress =
2407 GetValidDeviceAddress({ ptr->GetPlatformData().deviceAddress, ref.data.offset }, validAddresses);
2408 }
2409 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2410 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2411 nullptr, // pNext
2412 deviceAddress, // data
2413 ref.info.stride, // stride
2414 };
2415
2416 buildRangeInfos[arrayIndex] = {
2417 1u, // primitiveCount
2418 0u, // primitiveOffset
2419 0u, // firstVertex
2420 0u, // transformOffset
2421 };
2422 arrayIndex++;
2423 }
2424 for (const auto& ref : renderCmd.instancesView) {
2425 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2426 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2427 nullptr, // pNext
2428 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
2429 {}, // geometry;
2430 VkGeometryFlagsKHR(ref.info.geometryFlags), // flags
2431 };
2432 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2433 if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(ref.data.handle); ptr) {
2434 deviceAddress.deviceAddress = ptr->GetPlatformData().deviceAddress;
2435 deviceAddress =
2436 GetValidDeviceAddress({ ptr->GetPlatformData().deviceAddress, ref.data.offset }, validAddresses);
2437 }
2438 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2439 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2440 nullptr, // pNext
2441 ref.info.arrayOfPointers, // arrayOfPointers
2442 deviceAddress, // data
2443 };
2444 buildRangeInfos[arrayIndex] = {
2445 ref.info.primitiveCount, // primitiveCount
2446 0u, // primitiveOffset
2447 0u, // firstVertex
2448 0u, // transformOffset
2449 };
2450 arrayIndex++;
2451 }
2452
2453 const VkDeviceOrHostAddressKHR scratchData = { GetValidDeviceAddress(
2454 { scratchBuffer->GetPlatformData().deviceAddress, geometry.scratchBuffer.offset }, validAddresses)
2455 .deviceAddress };
2456
2457 const uint32_t geometryCount = isTopLevel ? 1U : arrayIndex;
2458 const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2459 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2460 nullptr, // pNext
2461 VkAccelerationStructureTypeKHR(geometry.info.type), // type
2462 VkBuildAccelerationStructureFlagsKHR(geometry.info.flags), // flags
2463 VkBuildAccelerationStructureModeKHR(geometry.info.mode), // mode
2464 VK_NULL_HANDLE, // srcAccelerationStructure
2465 dstAs, // dstAccelerationStructure
2466 geometryCount, // geometryCount
2467 geometryData.data(), // pGeometries
2468 nullptr, // ppGeometries
2469 scratchData, // scratchData
2470 };
2471
2472 vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2473 for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2474 buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2475 }
2476 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2477 if (validAddresses && extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2478 extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2479 1U, // infoCount
2480 &buildGeometryInfo, // pInfos
2481 buildRangeInfosPtr.data()); // ppBuildRangeInfos
2482 }
2483 #if (RENDER_VALIDATION_ENABLED == 1)
2484 if (!validAddresses) {
2485 const string tmpStr = "RenderBackendVk::RenderCommandBuildAccelerationStructure_address";
2486 PLUGIN_LOG_ONCE_W(
2487 tmpStr, "RENDER_VALIDATION: Invalid device addresses in RenderCommandBuildAccelerationStructure");
2488 }
2489 #endif
2490 #endif
2491 }
2492
RenderCommand(const RenderCommandCopyAccelerationStructureInstances & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2493 void RenderBackendVk::RenderCommand(const RenderCommandCopyAccelerationStructureInstances& renderCmd,
2494 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2495 const StateCache& stateCache)
2496 {
2497 #if (RENDER_VULKAN_RT_ENABLED == 1)
2498 // NOTE: at the moment just handles barriers and copy here
2499 const RenderHandle dstHandle = renderCmd.destination.handle;
2500 const GpuBufferDesc dstBufferDesc = gpuResourceMgr_.GetBufferDescriptor(dstHandle);
2501 if (uint8_t* dstDataBegin = static_cast<uint8_t*>(gpuResourceMgr_.MapBuffer(dstHandle)); dstDataBegin) {
2502 const uint8_t* dstDataEnd = dstDataBegin + dstBufferDesc.byteSize;
2503 // add render command offset
2504 dstDataBegin += size_t(renderCmd.destination.offset);
2505 // loop and copy all instances
2506 bool validAddresses = true;
2507 for (uint32_t idx = 0; idx < renderCmd.instancesView.size(); ++idx) {
2508 const auto& ref = renderCmd.instancesView[idx];
2509 uint64_t accelDeviceAddress = 0;
2510 if (const GpuBufferVk* ptr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(ref.accelerationStructure); ptr) {
2511 accelDeviceAddress = GetValidDeviceAddress(
2512 { ptr->GetPlatformDataAccelerationStructure().deviceAddress, 0 }, validAddresses)
2513 .deviceAddress;
2514 }
2515 const auto& tr = ref.transform;
2516 // convert 4x3 column to 3x4 row
2517 VkAccelerationStructureInstanceKHR instance {
2518 { { { tr[0].x, tr[1].x, tr[2].x, tr[3].x }, { tr[0].y, tr[1].y, tr[2].y, tr[3].y },
2519 { tr[0].z, tr[1].z, tr[2].z, tr[3].z } } }, // transform
2520 ref.instanceCustomIndex, // instanceCustomIndex : 24
2521 ref.mask, // mask : 8
2522 0U, // instanceShaderBindingTableRecordOffset : 24
2523 VkGeometryInstanceFlagsKHR(ref.flags), // flags : 8
2524 accelDeviceAddress, // accelerationStructureReference
2525 };
2526 constexpr size_t byteSize = sizeof(VkAccelerationStructureInstanceKHR);
2527 uint8_t* dstData = dstDataBegin + byteSize * idx;
2528 CloneData(dstData, size_t(dstDataEnd - dstData), &instance, byteSize);
2529 }
2530 gpuResourceMgr_.UnmapBuffer(dstHandle);
2531
2532 #if (RENDER_VALIDATION_ENABLED == 1)
2533 if (!validAddresses) {
2534 const string tmpStr = "RenderBackendVk::RenderCommandCopyAccelerationStructureInstances_address";
2535 PLUGIN_LOG_ONCE_W(tmpStr,
2536 "RENDER_VALIDATION: Invalid device addresses in RenderCommandCopyAccelerationStructureInstances");
2537 }
2538 #endif
2539 }
2540 #endif
2541 }
2542
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2543 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2544 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2545 const StateCache& stateCache)
2546 {
2547 const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2548 // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2549 const auto imageLayout = (VkImageLayout)renderCmd.imageLayout;
2550 PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2551 if (imagePtr) {
2552 const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2553 if (platImage.image) {
2554 VkClearColorValue clearColor;
2555 PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2556 CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2557
2558 // NOTE: temporary vector allocated due to not having max limit
2559 vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2560 for (size_t idx = 0; idx < ranges.size(); ++idx) {
2561 const auto& inputRef = renderCmd.ranges[idx];
2562 ranges[idx] = {
2563 (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2564 inputRef.baseMipLevel, // baseMipLevel
2565 inputRef.levelCount, // levelCount
2566 inputRef.baseArrayLayer, // baseArrayLayer
2567 inputRef.layerCount, // layerCount
2568 };
2569 }
2570
2571 vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2572 platImage.image, // image
2573 imageLayout, // imageLayout
2574 &clearColor, // pColor
2575 static_cast<uint32_t>(ranges.size()), // rangeCount
2576 ranges.data()); // pRanges
2577 }
2578 }
2579 }
2580
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2581 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2582 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2583 const StateCache& stateCache)
2584 {
2585 const ViewportDesc& vd = renderCmd.viewportDesc;
2586
2587 VkViewport vp {
2588 vd.x, // x
2589 vd.y, // y
2590 vd.width, // width
2591 vd.height, // height
2592 vd.minDepth, // minDepth
2593 vd.maxDepth, // maxDepth
2594 };
2595 // handle viewport for surface transform
2596 const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2597 if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2598 if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) ==
2599 CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2600 vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.height - vd.y;
2601 vp.y = vd.x;
2602 vp.width = vd.height;
2603 vp.height = vd.width;
2604 } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) ==
2605 CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2606 vp.x = static_cast<float>(rpd.framebufferSize.width) - vd.width - vd.x;
2607 vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.height - vd.y;
2608 } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) ==
2609 CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2610 vp.x = vd.y;
2611 vp.y = static_cast<float>(rpd.framebufferSize.height) - vd.width - vd.x;
2612 vp.width = vd.height;
2613 vp.height = vd.width;
2614 }
2615 }
2616
2617 vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2618 0, // firstViewport
2619 1, // viewportCount
2620 &vp); // pViewports
2621 }
2622
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2623 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2624 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2625 const StateCache& stateCache)
2626 {
2627 const ScissorDesc& sd = renderCmd.scissorDesc;
2628
2629 VkRect2D sc {
2630 { sd.offsetX, sd.offsetY }, // offset
2631 { sd.extentWidth, sd.extentHeight }, // extent
2632 };
2633 // handle scissor for surface transform
2634 const LowLevelRenderPassDataVk& rpd = stateCache.lowLevelRenderPassData;
2635 if (rpd.surfaceTransformFlags > CORE_SURFACE_TRANSFORM_IDENTITY_BIT) {
2636 if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) ==
2637 CORE_SURFACE_TRANSFORM_ROTATE_90_BIT) {
2638 sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.height - sc.offset.y, sc.offset.x },
2639 { sc.extent.height, sc.extent.width } };
2640 } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) ==
2641 CORE_SURFACE_TRANSFORM_ROTATE_180_BIT) {
2642 sc = { { (int32_t)rpd.framebufferSize.width - (int32_t)sc.extent.width - sc.offset.x,
2643 (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.height - sc.offset.y },
2644 { sc.extent.width, sc.extent.height } };
2645 } else if ((rpd.surfaceTransformFlags & CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) ==
2646 CORE_SURFACE_TRANSFORM_ROTATE_270_BIT) {
2647 sc = { { sc.offset.y, (int32_t)rpd.framebufferSize.height - (int32_t)sc.extent.width - sc.offset.x },
2648 { sc.extent.height, sc.extent.width } };
2649 }
2650 }
2651
2652 vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2653 0, // firstScissor
2654 1, // scissorCount
2655 &sc); // pScissors
2656 }
2657
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2658 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2659 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2660 const StateCache& stateCache)
2661 {
2662 vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2663 renderCmd.lineWidth); // lineWidth
2664 }
2665
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2666 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2667 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2668 const StateCache& stateCache)
2669 {
2670 vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2671 renderCmd.depthBiasConstantFactor, // depthBiasConstantFactor
2672 renderCmd.depthBiasClamp, // depthBiasClamp
2673 renderCmd.depthBiasSlopeFactor); // depthBiasSlopeFactor
2674 }
2675
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2676 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2677 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2678 const StateCache& stateCache)
2679 {
2680 vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2681 renderCmd.blendConstants); // blendConstants[4]
2682 }
2683
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2684 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2685 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2686 const StateCache& stateCache)
2687 {
2688 vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2689 renderCmd.minDepthBounds, // minDepthBounds
2690 renderCmd.maxDepthBounds); // maxDepthBounds
2691 }
2692
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2693 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2694 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2695 const StateCache& stateCache)
2696 {
2697 const auto stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2698
2699 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2700 vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2701 stencilFaceMask, // faceMask
2702 renderCmd.mask); // compareMask
2703 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2704 vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2705 stencilFaceMask, // faceMask
2706 renderCmd.mask); // writeMask
2707 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2708 vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2709 stencilFaceMask, // faceMask
2710 renderCmd.mask); // reference
2711 }
2712 }
2713
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2714 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2715 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2716 const StateCache& stateCache)
2717 {
2718 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2719 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2720 if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2721 const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2722 const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2723 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2724 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2725 };
2726
2727 extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2728 &fragmentSize, // pFragmentSize
2729 combinerOps); // combinerOps
2730 }
2731 #endif
2732 }
2733
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2734 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2735 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2736 const StateCache& stateCache)
2737 {
2738 if (renderCmd.command) {
2739 const RenderBackendRecordingStateVk recordingState = {
2740 {},
2741 cmdBuf.commandBuffer, // commandBuffer
2742 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2743 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2744 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2745 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2746 stateCache.pipelineLayout, // pipelineLayout
2747 };
2748 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2749 renderCmd.command->ExecuteBackendCommand(lowLevelDevice, recordingState);
2750 } else if (stateCache.backendNode) {
2751 // legacy support for backend render nodes
2752 const RenderBackendRecordingStateVk recordingState = {
2753 {},
2754 cmdBuf.commandBuffer, // commandBuffer
2755 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2756 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2757 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2758 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2759 stateCache.pipelineLayout, // pipelineLayout
2760 };
2761 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2762 stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2763 }
2764 }
2765
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2766 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2767 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2768 {
2769 PLUGIN_ASSERT_MSG(false, "not implemented");
2770
2771 const auto pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2772 const uint32_t queryIndex = renderCmd.queryIndex;
2773 VkQueryPool queryPool = VK_NULL_HANDLE;
2774
2775 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2776 queryPool, // queryPool
2777 queryIndex, // firstQuery
2778 1); // queryCount
2779
2780 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2781 pipelineStageFlagBits, // pipelineStage
2782 queryPool, // queryPool
2783 queryIndex); // query
2784 }
2785
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2786 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2787 {
2788 for (auto& presRef : presentationData_.infos) {
2789 if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2790 continue;
2791 }
2792
2793 PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2794 PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2795
2796 const GpuResourceState& state = presRef.renderGraphProcessedState;
2797 const auto srcAccessMask = (VkAccessFlags)state.accessFlags;
2798 const auto dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2799 const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2800 (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2801 const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2802 const auto oldLayout = (VkImageLayout)presRef.imageLayout;
2803 const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2804 // NOTE: queue is not currently checked (should be in the same queue as last time used)
2805 constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2806 constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2807 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2808 constexpr VkImageSubresourceRange imageSubresourceRange {
2809 VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2810 0, // baseMipLevel
2811 1, // levelCount
2812 0, // baseArrayLayer
2813 1, // layerCount
2814 };
2815
2816 const VkImageMemoryBarrier imageMemoryBarrier {
2817 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2818 nullptr, // pNext
2819 srcAccessMask, // srcAccessMask
2820 dstAccessMask, // dstAccessMask
2821 oldLayout, // oldLayout
2822 newLayout, // newLayout
2823 srcQueueFamilyIndex, // srcQueueFamilyIndex
2824 dstQueueFamilyIndex, // dstQueueFamilyIndex
2825 presRef.swapchainImage, // image
2826 imageSubresourceRange, // subresourceRange
2827 };
2828
2829 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2830 srcStageMask, // srcStageMask
2831 dstStageMask, // dstStageMask
2832 dependencyFlags, // dependencyFlags
2833 0, // memoryBarrierCount
2834 nullptr, // pMemoryBarriers
2835 0, // bufferMemoryBarrierCount
2836 nullptr, // pBufferMemoryBarriers
2837 1, // imageMemoryBarrierCount
2838 &imageMemoryBarrier); // pImageMemoryBarriers
2839
2840 presRef.presentationLayoutChangeNeeded = false;
2841 presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2842 }
2843 }
2844
2845 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
BeginDebugMarker(const LowLevelCommandBufferVk & cmdBuf,const BASE_NS::string_view name,const Math::Vec4 color)2846 void RenderBackendVk::BeginDebugMarker(
2847 const LowLevelCommandBufferVk& cmdBuf, const BASE_NS::string_view name, const Math::Vec4 color)
2848 {
2849 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
2850 const VkDebugUtilsLabelEXT label {
2851 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
2852 nullptr, // pNext
2853 name.data(), // pLabelName
2854 { color.x, color.y, color.z, color.w } // color[4]
2855 };
2856 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuf.commandBuffer, &label);
2857 }
2858 }
2859
EndDebugMarker(const LowLevelCommandBufferVk & cmdBuf)2860 void RenderBackendVk::EndDebugMarker(const LowLevelCommandBufferVk& cmdBuf)
2861 {
2862 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
2863 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuf.commandBuffer);
2864 }
2865 }
2866 #endif
2867
2868 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
RenderCommand(const RenderCommandBeginDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2869 void RenderBackendVk::RenderCommand(const RenderCommandBeginDebugMarker& renderCmd,
2870 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2871 const StateCache& stateCache)
2872 {
2873 BeginDebugMarker(cmdBuf, renderCmd.name, renderCmd.color);
2874 }
2875
RenderCommand(const RenderCommandEndDebugMarker & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2876 void RenderBackendVk::RenderCommand(const RenderCommandEndDebugMarker& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2877 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2878 {
2879 EndDebugMarker(cmdBuf);
2880 }
2881 #endif
2882
2883 #if (RENDER_PERF_ENABLED == 1)
2884
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2885 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2886 {
2887 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2888 const string_view& debugName = renderCommandContext.debugName;
2889 if (timers_.count(debugName) == 0) { // new timers
2890 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2891 PerfDataSet& perfDataSet = timers_[debugName];
2892 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2893 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2894 constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2895 perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2896 #else
2897 timers_.insert({ debugName, {} });
2898 #endif
2899 }
2900 }
2901
2902 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2903 perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2904 perfGpuTimerData_.currentOffset =
2905 (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2906 #endif
2907 }
2908
EndFrameTimers()2909 void RenderBackendVk::EndFrameTimers()
2910 {
2911 int64_t fullGpuTime = 0;
2912 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2913 // already in micros
2914 fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2915 perfGpuTimerData_.fullGpuCounter = 0;
2916
2917 perfGpuTimerData_.gpuBuffer->Unmap();
2918 #endif
2919 if (IPerformanceDataManagerFactory* globalPerfData =
2920 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2921 globalPerfData) {
2922 IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2923 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2924 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2925 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2926 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2927 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2928 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2929
2930 CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2931 CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2932 CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2933 CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2934 CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2935 CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2936 }
2937 // go through and count combined draw counts for tracing
2938 PerfCounters counters;
2939 for (auto& timer : timers_) {
2940 CopyPerfCounters(timer.second.perfCounters, counters);
2941 timer.second.perfCounters = {}; // reset perf counters
2942 }
2943
2944 CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(counters.drawCount));
2945 CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(counters.drawIndirectCount));
2946 CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(counters.dispatchCount));
2947 CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(counters.dispatchIndirectCount));
2948 CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(counters.renderPassCount));
2949 CORE_PROFILER_PLOT("Bind pipeline count", static_cast<int64_t>(counters.bindPipelineCount));
2950 CORE_PROFILER_PLOT("Bind descriptor set count", static_cast<int64_t>(counters.bindDescriptorSetCount));
2951 CORE_PROFILER_PLOT("Update descriptor set count", static_cast<int64_t>(counters.updateDescriptorSetCount));
2952 CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(counters.instanceCount));
2953 CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(counters.triangleCount));
2954 }
2955
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2956 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2957 const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2958 {
2959 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2960 if (stateCache.secondaryCommandBuffer) {
2961 return; // cannot be called inside render pass (e.g. with secondary command buffers)
2962 }
2963 PLUGIN_ASSERT(timers_.count(name) == 1);
2964 const PerfDataSet* perfDataSet = &timers_[name];
2965 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2966 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2967 if (platData.queryPool) {
2968 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2969 platData.queryPool, // queryPool
2970 queryIndex, // firstQuery
2971 1); // queryCount
2972
2973 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2974 stageFlagBits, // pipelineStage,
2975 platData.queryPool, // queryPool,
2976 queryIndex); // query
2977 }
2978 }
2979 #endif
2980 }
2981
2982 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2983 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2984 {
2985 perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount,
2986 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2987 perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount,
2988 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2989 perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount,
2990 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2991 perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount,
2992 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2993 perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount,
2994 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2995 perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount,
2996 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2997 perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount,
2998 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2999 perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount,
3000 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3001 perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount,
3002 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3003 perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount,
3004 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
3005 }
3006 } // namespace
3007
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)3008 void RenderBackendVk::CopyPerfTimeStamp(
3009 const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
3010 {
3011 PLUGIN_ASSERT(timers_.count(name) == 1);
3012 PerfDataSet* const perfDataSet = &timers_[name];
3013
3014 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
3015 // take data from earlier queries to cpu
3016 // and copy in from query to gpu buffer
3017 const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
3018 int64_t gpuMicroSeconds = 0;
3019 {
3020 auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
3021 auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
3022
3023 const uint64_t startStamp = *currentData;
3024 const uint64_t endStamp = *(currentData + 1);
3025
3026 const double timestampPeriod =
3027 static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
3028 .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
3029 constexpr int64_t nanosToMicrosDivisor { 1000 };
3030 gpuMicroSeconds =
3031 static_cast<int64_t>(static_cast<double>(endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
3032 constexpr int64_t maxValidMicroSecondValue { 4294967295 };
3033 if (gpuMicroSeconds > maxValidMicroSecondValue) {
3034 gpuMicroSeconds = 0;
3035 }
3036 perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
3037 }
3038 #endif
3039 const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
3040
3041 if (IPerformanceDataManagerFactory* globalPerfData =
3042 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
3043 globalPerfData) {
3044 IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
3045
3046 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
3047 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
3048 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
3049
3050 // cannot be called inside render pass (e.g. with secondary command buffers)
3051 if (!stateCache.secondaryCommandBuffer) {
3052 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
3053 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
3054
3055 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
3056 PLUGIN_ASSERT(gpuBuffer);
3057 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
3058
3059 constexpr uint32_t queryCount = 2;
3060 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
3061 constexpr VkQueryResultFlags queryResultFlags =
3062 VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
3063
3064 if (platData.queryPool) {
3065 vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
3066 platData.queryPool, // queryPool
3067 0, // firstQuery
3068 queryCount, // queryCount
3069 platBuffer.buffer, // dstBuffer
3070 currentFrameByteOffset, // dstOffset
3071 queryStride, // stride
3072 queryResultFlags); // flags
3073 }
3074 }
3075 }
3076 #endif
3077 UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
3078 }
3079 }
3080
3081 #endif
3082 RENDER_END_NAMESPACE()
3083