1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_vk.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan.h>
22
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40
41 #include "device/gpu_acceleration_structure.h"
42 #include "device/gpu_buffer.h"
43 #include "device/gpu_image.h"
44 #include "device/gpu_resource_handle_util.h"
45 #include "device/gpu_resource_manager.h"
46 #include "device/gpu_sampler.h"
47 #include "device/pipeline_state_object.h"
48 #include "device/render_frame_sync.h"
49 #include "nodecontext/node_context_descriptor_set_manager.h"
50 #include "nodecontext/node_context_pool_manager.h"
51 #include "nodecontext/node_context_pso_manager.h"
52 #include "nodecontext/render_barrier_list.h"
53 #include "nodecontext/render_command_list.h"
54 #include "nodecontext/render_node_graph_node_store.h"
55 #include "render_backend.h"
56 #include "render_graph.h"
57 #include "util/log.h"
58 #include "vulkan/gpu_acceleration_structure_vk.h"
59 #include "vulkan/gpu_buffer_vk.h"
60 #include "vulkan/gpu_image_vk.h"
61 #include "vulkan/gpu_sampler_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68
69 using namespace BASE_NS;
70
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84 const VkBufferDeviceAddressInfo addressInfo {
85 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86 nullptr, // pNext
87 buffer, // buffer
88 };
89 return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 } // namespace
93
94 // Helper class for running std::function as a ThreadPool task.
95 class FunctionTask final : public IThreadPool::ITask {
96 public:
Create(std::function<void ()> func)97 static Ptr Create(std::function<void()> func)
98 {
99 return Ptr { new FunctionTask(func) };
100 }
101
FunctionTask(std::function<void ()> func)102 explicit FunctionTask(std::function<void()> func) : func_(func) {};
103
operator ()()104 void operator()() override
105 {
106 func_();
107 }
108
109 protected:
Destroy()110 void Destroy() override
111 {
112 delete this;
113 }
114
115 private:
116 std::function<void()> func_;
117 };
118
119 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
120 namespace {
121 constexpr const string_view COMMAND_NAMES[] = { "Undefined", "Draw", "DrawIndirect", "Dispatch", "DispatchIndirect",
122 "BindPipeline", "BeginRenderPass", "NextSubpass", "EndRenderPass", "BindVertexBuffers", "BindIndexBuffer",
123 "CopyBuffer", "CopyBufferImage", "BlitImage", "BarrierPoint", "UpdateDescriptorSets", "BindDescriptorSets",
124 "PushConstant", "DynamicStateViewport", "DynamicStateScissor", "DynamicStateLineWidth", "DynamicStateDepthBias",
125 "DynamicStateBlendConstants", "DynamicStateDepthBounds", "DynamicStateStencil", "ExecuteBackendFramePosition",
126 "WriteTimestamp", "GpuQueueTransferRelease", "GpuGueueTransferAcquire" };
127 } // namespace
128 #endif
129
130 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
131 namespace {
132 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
133 }
134 #endif
135
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,const CORE_NS::IParallelTaskQueue::Ptr & queue)136 RenderBackendVk::RenderBackendVk(
137 Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
138 : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
139 queue_(queue.get())
140 {
141 #if (RENDER_PERF_ENABLED == 1)
142 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
143 gpuQueryMgr_ = make_unique<GpuQueryManager>();
144
145 constexpr uint32_t maxQueryObjectCount { 512u };
146 constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
147 const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
148 const GpuBufferDesc desc {
149 BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT, // usageFlags
150 CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
151 0, // engineCreationFlags
152 fullByteSize, // byteSize
153 };
154 perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
155 perfGpuTimerData_.currentOffset = 0;
156 perfGpuTimerData_.frameByteSize = byteSize;
157 perfGpuTimerData_.fullByteSize = fullByteSize;
158 { // zero initialize
159 uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
160 memset_s(bufferData, fullByteSize, 0, fullByteSize);
161 perfGpuTimerData_.gpuBuffer->Unmap();
162 }
163 #endif
164 #endif
165 }
166
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)167 void RenderBackendVk::AcquirePresentationInfo(
168 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
169 {
170 presentationInfo_ = {};
171 if ((backBufferConfig.config.backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) &&
172 device_.HasSwapchain()) {
173 presentationInfo_.useSwapchain = true;
174 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
175
176 const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain());
177 if (swapchain) {
178 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
179 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
180 presentationInfo_.swapchainSemaphore = platSwapchain.swapchainImages.semaphore;
181
182 const VkResult result = vkAcquireNextImageKHR(device, // device
183 vkSwapchain, // swapchin
184 UINT64_MAX, // timeout
185 presentationInfo_.swapchainSemaphore, // semaphore
186 (VkFence) nullptr, // fence
187 &presentationInfo_.swapchainImageIndex); // pImageIndex
188
189 switch (result) {
190 // Success
191 case VK_SUCCESS:
192 case VK_TIMEOUT:
193 case VK_NOT_READY:
194 case VK_SUBOPTIMAL_KHR:
195 presentationInfo_.validAcquire = true;
196 break;
197
198 // Failure
199 case VK_ERROR_OUT_OF_HOST_MEMORY:
200 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
201 PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
202 return;
203 case VK_ERROR_DEVICE_LOST:
204 PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
205 return;
206 case VK_ERROR_OUT_OF_DATE_KHR:
207 PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
208 return;
209 case VK_ERROR_SURFACE_LOST_KHR:
210 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
211 return;
212
213 case VK_EVENT_SET:
214 case VK_EVENT_RESET:
215 case VK_INCOMPLETE:
216 case VK_ERROR_INITIALIZATION_FAILED:
217 case VK_ERROR_MEMORY_MAP_FAILED:
218 case VK_ERROR_LAYER_NOT_PRESENT:
219 case VK_ERROR_EXTENSION_NOT_PRESENT:
220 case VK_ERROR_FEATURE_NOT_PRESENT:
221 case VK_ERROR_INCOMPATIBLE_DRIVER:
222 case VK_ERROR_TOO_MANY_OBJECTS:
223 case VK_ERROR_FORMAT_NOT_SUPPORTED:
224 case VK_ERROR_FRAGMENTED_POOL:
225 case VK_ERROR_OUT_OF_POOL_MEMORY:
226 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
227 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
228 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
229 case VK_ERROR_VALIDATION_FAILED_EXT:
230 case VK_ERROR_INVALID_SHADER_NV:
231 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
232 case VK_ERROR_FRAGMENTATION_EXT:
233 case VK_ERROR_NOT_PERMITTED_EXT:
234 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
235 case VK_RESULT_MAX_ENUM:
236 default:
237 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
238 PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
239 device_.SetDeviceStatus(false);
240 break;
241 }
242
243 PLUGIN_ASSERT(
244 presentationInfo_.swapchainImageIndex < (uint32_t)platSwapchain.swapchainImages.images.size());
245
246 // remap image to backbuffer
247 const RenderHandle backBufferHandle =
248 gpuResourceMgr_.GetImageRawHandle(backBufferConfig.config.backBufferName);
249 const RenderHandle currentSwapchainHandle = gpuResourceMgr_.GetImageRawHandle(
250 "CORE_DEFAULT_SWAPCHAIN_" + to_string(presentationInfo_.swapchainImageIndex));
251 // special swapchain remapping
252 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(backBufferHandle, currentSwapchainHandle);
253 presentationInfo_.renderGraphProcessedState = backBufferConfig.backBufferState;
254 presentationInfo_.imageLayout = backBufferConfig.layout;
255 if (presentationInfo_.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
256 presentationInfo_.presentationLayoutChangeNeeded = true;
257 presentationInfo_.renderNodeCommandListIndex =
258 static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
259
260 const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(backBufferHandle);
261 PLUGIN_ASSERT(swapImage);
262 presentationInfo_.swapchainImage = swapImage->GetPlatformData().image;
263 }
264 }
265 }
266
267 #if (RENDER_VALIDATION_ENABLED == 1)
268 if ((backBufferConfig.config.backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) &&
269 (!device_.HasSwapchain())) {
270 PLUGIN_LOG_E("RENDER_VALIDATION: trying to present without swapchain");
271 }
272 #endif
273 }
274
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)275 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
276 {
277 if (presentationInfo_.useSwapchain && backBufferConfig.config.present && presentationInfo_.validAcquire) {
278 PLUGIN_ASSERT(!presentationInfo_.presentationLayoutChangeNeeded);
279 PLUGIN_ASSERT(presentationInfo_.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR);
280 #if (RENDER_PERF_ENABLED == 1)
281 commonCpuTimers_.present.Begin();
282 #endif
283 const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain());
284 if (swapchain) {
285 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
286 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
287
288 PLUGIN_ASSERT(
289 presentationInfo_.swapchainImageIndex < (uint32_t)platSwapchain.swapchainImages.images.size());
290
291 // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid semaphore)
292 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
293 uint32_t waitSemaphoreCount = 0;
294 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
295 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
296 waitSemaphoreCount = 1;
297 }
298
299 const VkPresentInfoKHR presentInfo {
300 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
301 nullptr, // pNext
302 waitSemaphoreCount, // waitSemaphoreCount
303 &waitSemaphore, // pWaitSemaphores
304 1, // swapchainCount
305 &vkSwapchain, // pSwapchains
306 &presentationInfo_.swapchainImageIndex, // pImageIndices
307 nullptr // pResults
308 };
309
310 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
311 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
312 &presentInfo); // pPresentInfo
313
314 switch (result) {
315 // Success
316 case VK_SUCCESS:
317 break;
318 case VK_SUBOPTIMAL_KHR:
319 #if (RENDER_VALIDATION_ENABLED == 1)
320 PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
321 #endif
322 break;
323
324 // Failure
325 case VK_ERROR_OUT_OF_HOST_MEMORY:
326 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
327 PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
328 return;
329 case VK_ERROR_DEVICE_LOST:
330 PLUGIN_LOG_E("vkQueuePresentKHR device lost");
331 return;
332 case VK_ERROR_OUT_OF_DATE_KHR:
333 PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
334 return;
335 case VK_ERROR_SURFACE_LOST_KHR:
336 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
337 return;
338
339 case VK_NOT_READY:
340 case VK_TIMEOUT:
341 case VK_EVENT_SET:
342 case VK_EVENT_RESET:
343 case VK_INCOMPLETE:
344 case VK_ERROR_INITIALIZATION_FAILED:
345 case VK_ERROR_MEMORY_MAP_FAILED:
346 case VK_ERROR_LAYER_NOT_PRESENT:
347 case VK_ERROR_EXTENSION_NOT_PRESENT:
348 case VK_ERROR_FEATURE_NOT_PRESENT:
349 case VK_ERROR_INCOMPATIBLE_DRIVER:
350 case VK_ERROR_TOO_MANY_OBJECTS:
351 case VK_ERROR_FORMAT_NOT_SUPPORTED:
352 case VK_ERROR_FRAGMENTED_POOL:
353 case VK_ERROR_OUT_OF_POOL_MEMORY:
354 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
355 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
356 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
357 case VK_ERROR_VALIDATION_FAILED_EXT:
358 case VK_ERROR_INVALID_SHADER_NV:
359 case VK_ERROR_FRAGMENTATION_EXT:
360 case VK_ERROR_NOT_PERMITTED_EXT:
361 case VK_RESULT_MAX_ENUM:
362 default:
363 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
364 PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
365 break;
366 }
367 }
368 #if (RENDER_PERF_ENABLED == 1)
369 {
370 commonCpuTimers_.present.End();
371 }
372 #endif
373 }
374 }
375
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)376 void RenderBackendVk::Render(
377 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
378 {
379 // NOTE: all command lists are validated before entering here
380 #if (RENDER_PERF_ENABLED == 1)
381 commonCpuTimers_.full.Begin();
382 commonCpuTimers_.acquire.Begin();
383 #endif
384
385 commandBufferSubmitter_ = {};
386 commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
387
388 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
389 if (presentationInfo_.useSwapchain && (!presentationInfo_.validAcquire)) {
390 return;
391 }
392
393 #if (RENDER_PERF_ENABLED == 1)
394 commonCpuTimers_.acquire.End();
395
396 StartFrameTimers(renderCommandFrameData);
397 commonCpuTimers_.execute.Begin();
398 #endif
399
400 // command list process loop/execute
401 RenderProcessCommandLists(renderCommandFrameData);
402
403 #if (RENDER_PERF_ENABLED == 1)
404 commonCpuTimers_.execute.End();
405 commonCpuTimers_.submit.Begin();
406 #endif
407
408 PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
409 // submit vulkan command buffers
410 RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
411
412 #if (RENDER_PERF_ENABLED == 1)
413 commonCpuTimers_.submit.End();
414 commonCpuTimers_.full.End();
415 EndFrameTimers();
416 #endif
417 }
418
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)419 void RenderBackendVk::RenderProcessSubmitCommandLists(
420 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
421 {
422 // NOTE: currently backtraces to final valid command buffer semaphore
423 uint32_t finalCommandBufferSubmissionIndex = ~0u;
424 commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
425 for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
426 --cmdBufferIdx) {
427 if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
428 (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
429 VK_NULL_HANDLE)) {
430 finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
431 break;
432 }
433 }
434
435 for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
436 const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
437 if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
438 continue;
439 }
440
441 const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
442
443 uint32_t waitSemaphoreCount = 0u;
444 VkSemaphore waitSemaphores[PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + 1]; // + 1 for swapchain
445 VkPipelineStageFlags waitSemaphorePipelineStageFlags[PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS];
446 for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
447 const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
448 PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
449
450 VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
451 if (waitSemaphore != VK_NULL_HANDLE) {
452 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
453 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
454 waitSemaphoreCount++;
455 }
456 }
457
458 if ((renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
459 (presentationInfo_.swapchainSemaphore != VK_NULL_HANDLE)) {
460 waitSemaphores[waitSemaphoreCount] = presentationInfo_.swapchainSemaphore;
461 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
462 waitSemaphoreCount++;
463 }
464
465 uint32_t signalSemaphoreCount = 0u;
466 VkSemaphore semaphores[] = { VK_NULL_HANDLE, VK_NULL_HANDLE };
467 VkFence fence = VK_NULL_HANDLE;
468 if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
469 // add fence signaling to last submission for frame sync
470 if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
471 fence = frameSync->GetFrameFence().fence;
472 frameSync->FrameFenceIsSignalled();
473 }
474
475 if (presentationInfo_.useSwapchain && backBufferConfig.config.present) {
476 commandBufferSubmitter_.presentationWaitSemaphore =
477 commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
478 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
479 }
480 if (backBufferConfig.config.gpuSemaphoreHandle != 0) {
481 semaphores[signalSemaphoreCount++] =
482 VulkanHandleCast<VkSemaphore>(backBufferConfig.config.gpuSemaphoreHandle);
483 }
484 } else if (renderContextRef.submitDepencies.signalSemaphore) {
485 semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
486 }
487 PLUGIN_ASSERT(signalSemaphoreCount <= 2); // 2: no more than 2 semaphores
488
489 const VkSubmitInfo submitInfo {
490 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
491 nullptr, // pNext
492 waitSemaphoreCount, // waitSemaphoreCount
493 waitSemaphores, // pWaitSemaphores
494 waitSemaphorePipelineStageFlags, // pWaitDstStageMask
495 1, // commandBufferCount
496 &cmdSubmitterRef.commandBuffer, // pCommandBuffers
497 signalSemaphoreCount, // signalSemaphoreCount
498 semaphores, // pSignalSemaphores
499 };
500
501 const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
502 VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
503 1, // submitCount
504 &submitInfo, // pSubmits
505 fence)); // fence
506 }
507 }
508
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData)509 void RenderBackendVk::RenderProcessCommandLists(RenderCommandFrameData& renderCommandFrameData)
510 {
511 if (queue_) {
512 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
513 // NOTE: idx increase
514 // NOTE: currently does not multi-thread dependant multi render command list render passes
515 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
516 PLUGIN_ASSERT(ref.multiRenderCommandListCount > 0);
517 const uint32_t rcCount = ref.multiRenderCommandListCount;
518 queue_->Submit(cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
519 MultiRenderCommandListDesc mrcDesc;
520 mrcDesc.multiRenderCommandListCount = rcCount;
521 mrcDesc.baseContext =
522 (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
523
524 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
525 const uint32_t currIdx = cmdBufferIdx + rcIdx;
526 mrcDesc.multiRenderCommandListIndex = rcIdx;
527 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
528 const DebugNames debugNames { ref2.debugName,
529 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
530 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
531 }
532 }));
533
534 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
535 }
536
537 // Execute and wait for completion.
538 queue_->Execute();
539 queue_->Clear();
540 } else {
541 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
542 // NOTE: idx increase
543 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
544 PLUGIN_ASSERT(ref.multiRenderCommandListCount > 0);
545 const uint32_t rcCount = ref.multiRenderCommandListCount;
546
547 MultiRenderCommandListDesc mrcDesc;
548 mrcDesc.multiRenderCommandListCount = rcCount;
549 mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
550
551 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
552 const uint32_t currIdx = cmdBufferIdx + rcIdx;
553 mrcDesc.multiRenderCommandListIndex = rcIdx;
554 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
555 const DebugNames debugNames { ref2.debugName,
556 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
557 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
558 }
559 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
560 }
561 }
562 }
563
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)564 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
565 const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
566 {
567 // these are validated in render graph
568 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
569 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
570 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
571 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
572 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.contextPoolMgr;
573
574 ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
575
576 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
577
578 StateCache stateCache = {}; // state cache for this render command list
579 stateCache.backendNode = renderCommandCtx.renderBackendNode;
580
581 // command buffer has been wait with a single frame fence
582 const bool multiRenderCommandList =
583 (multiRenderCommandListDesc.multiRenderCommandListCount > 1 && multiRenderCommandListDesc.baseContext);
584 const bool beginCommandBuffer =
585 (!multiRenderCommandList || (multiRenderCommandListDesc.multiRenderCommandListIndex == 0));
586 const bool endCommandBuffer =
587 (!multiRenderCommandList || (multiRenderCommandListDesc.multiRenderCommandListIndex ==
588 multiRenderCommandListDesc.multiRenderCommandListCount - 1));
589 const ContextCommandPoolVk* ptrCmdPool = nullptr;
590 if (multiRenderCommandList) {
591 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(multiRenderCommandListDesc.baseContext->contextPoolMgr))
592 ->GetContextCommandPool();
593 } else {
594 ptrCmdPool = &((NodeContextPoolManagerVk&)contextPoolMgr).GetContextCommandPool();
595 }
596 PLUGIN_ASSERT(ptrCmdPool);
597 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
598
599 #if (RENDER_PERF_ENABLED == 1)
600 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
601 const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
602 const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
603 #endif
604 PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
605 PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
606 #endif
607
608 if (beginCommandBuffer) {
609 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
610 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
611 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
612 ptrCmdPool->commandPool, // commandPool
613 commandPoolResetFlags)); // flags
614
615 constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
616 VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
617 };
618 const VkCommandBufferBeginInfo commandBufferBeginInfo {
619 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
620 nullptr, // pNext
621 commandBufferUsageFlags, // flags
622 nullptr, // pInheritanceInfo
623 };
624
625 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
626 &commandBufferBeginInfo)); // pBeginInfo
627
628 #if (RENDER_PERF_ENABLED == 1)
629 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
630 if (validGpuQueries) {
631 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
632 PLUGIN_ASSERT(gpuQuery);
633
634 gpuQuery->NextQueryIndex();
635
636 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
637 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
638 }
639 #endif
640 perfDataSet->cpuTimer.Begin();
641 #endif
642 }
643
644 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
645 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
646 const VkDebugUtilsLabelEXT label {
647 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
648 nullptr, // pNext
649 debugNames.renderCommandListName.data(), // pLabelName
650 { 1.f, 1.f, 1.f, 1.f } // color[4]
651 };
652 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
653 }
654 #endif
655
656 for (const auto& ref : rcRef) {
657 PLUGIN_ASSERT(ref.rc);
658 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
659 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
660 const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
661 const VkDebugUtilsLabelEXT label {
662 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
663 nullptr, // pNext
664 COMMAND_NAMES[index].data(), // pLabelName
665 { 0.87f, 0.83f, 0.29f, 1.f } // color[4]
666 };
667 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
668 }
669 #endif
670
671 switch (ref.type) {
672 case RenderCommandType::BARRIER_POINT: {
673 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
674
675 // handle all barriers before render command that needs resource syncing
676 RenderCommand(
677 barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
678 break;
679 }
680 case RenderCommandType::DRAW: {
681 RenderCommand(
682 *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
683 break;
684 }
685 case RenderCommandType::DRAW_INDIRECT: {
686 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
687 contextPoolMgr, stateCache);
688 break;
689 }
690 case RenderCommandType::DISPATCH: {
691 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
692 contextPoolMgr, stateCache);
693 break;
694 }
695 case RenderCommandType::DISPATCH_INDIRECT: {
696 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
697 contextPoolMgr, stateCache);
698 break;
699 }
700 case RenderCommandType::BIND_PIPELINE: {
701 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
702 contextPoolMgr, stateCache);
703 break;
704 }
705 case RenderCommandType::BEGIN_RENDER_PASS: {
706 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
707 contextPoolMgr, stateCache);
708 break;
709 }
710 case RenderCommandType::NEXT_SUBPASS: {
711 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
712 contextPoolMgr, stateCache);
713 break;
714 }
715 case RenderCommandType::END_RENDER_PASS: {
716 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
717 contextPoolMgr, stateCache);
718 break;
719 }
720 case RenderCommandType::BIND_VERTEX_BUFFERS: {
721 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
722 contextPoolMgr, stateCache);
723 break;
724 }
725 case RenderCommandType::BIND_INDEX_BUFFER: {
726 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
727 contextPoolMgr, stateCache);
728 break;
729 }
730 case RenderCommandType::COPY_BUFFER: {
731 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
732 contextPoolMgr, stateCache);
733 break;
734 }
735 case RenderCommandType::COPY_BUFFER_IMAGE: {
736 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
737 contextPoolMgr, stateCache);
738 break;
739 }
740 case RenderCommandType::COPY_IMAGE: {
741 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
742 contextPoolMgr, stateCache);
743 break;
744 }
745 case RenderCommandType::UPDATE_DESCRIPTOR_SETS: {
746 RenderCommand(*static_cast<RenderCommandUpdateDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
747 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
748 break;
749 }
750 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
751 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
752 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
753 break;
754 }
755 case RenderCommandType::PUSH_CONSTANT: {
756 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
757 contextPoolMgr, stateCache);
758 break;
759 }
760 case RenderCommandType::BLIT_IMAGE: {
761 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
762 contextPoolMgr, stateCache);
763 break;
764 }
765 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
766 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
767 nodeContextPsoMgr, contextPoolMgr, stateCache);
768 break;
769 }
770 // dynamic states
771 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
772 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
773 contextPoolMgr, stateCache);
774 break;
775 }
776 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
777 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
778 contextPoolMgr, stateCache);
779 break;
780 }
781 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
782 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
783 contextPoolMgr, stateCache);
784 break;
785 }
786 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
787 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
788 contextPoolMgr, stateCache);
789 break;
790 }
791 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
792 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
793 nodeContextPsoMgr, contextPoolMgr, stateCache);
794 break;
795 }
796 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
797 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
798 contextPoolMgr, stateCache);
799 break;
800 }
801 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
802 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
803 contextPoolMgr, stateCache);
804 break;
805 }
806 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
807 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
808 nodeContextPsoMgr, contextPoolMgr, stateCache);
809 break;
810 }
811 //
812 case RenderCommandType::WRITE_TIMESTAMP: {
813 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
814 contextPoolMgr, stateCache);
815 break;
816 }
817 case RenderCommandType::UNDEFINED:
818 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
819 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
820 default: {
821 PLUGIN_ASSERT(false && "non-valid render command");
822 break;
823 }
824 }
825 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
826 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
827 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
828 }
829 #endif
830 }
831
832 if (presentationInfo_.renderNodeCommandListIndex == cmdBufIdx) {
833 RenderPresentationLayout(cmdBuffer);
834 }
835
836 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
837 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
838 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
839 }
840 #endif
841
842 if (endCommandBuffer) {
843 #if (RENDER_PERF_ENABLED == 1)
844 perfDataSet->cpuTimer.End();
845 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
846 if (validGpuQueries) {
847 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
848 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
849 }
850 #endif
851 CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
852 #endif
853
854 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
855
856 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
857 }
858 }
859
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)860 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
861 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
862 {
863 const RenderHandle psoHandle = renderCmd.psoHandle;
864 const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
865
866 stateCache.psoHandle = psoHandle;
867
868 VkPipeline pipeline { VK_NULL_HANDLE };
869 VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
870 if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
871 const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
872 psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
873 if (pso) {
874 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
875 pipeline = plat.pipeline;
876 pipelineLayout = plat.pipelineLayout;
877 }
878 } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
879 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
880 if (stateCache.renderCommandBeginRenderPass) {
881 uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
882 if (stateCache.pipelineDescSetHash != 0) {
883 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
884 }
885 const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
886 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
887 stateCache.renderCommandBeginRenderPass->subpasses,
888 stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
889 &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
890 if (pso) {
891 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
892 pipeline = plat.pipeline;
893 pipelineLayout = plat.pipelineLayout;
894 }
895 }
896 }
897 PLUGIN_ASSERT(pipeline);
898 PLUGIN_ASSERT(pipelineLayout);
899
900 const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
901 if (valid) {
902 stateCache.pipelineLayout = pipelineLayout;
903 stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
904 vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
905 pipelineBindPoint, // pipelineBindPoint
906 pipeline); // pipeline
907 #if (RENDER_PERF_ENABLED == 1)
908 stateCache.perfCounters.bindPipelineCount++;
909 #endif
910 }
911 }
912
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)913 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
914 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
915 {
916 if (renderCmd.indexCount) {
917 vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
918 renderCmd.indexCount, // indexCount
919 renderCmd.instanceCount, // instanceCount
920 renderCmd.firstIndex, // firstIndex
921 renderCmd.vertexOffset, // vertexOffset
922 renderCmd.firstInstance); // firstInstance
923 #if (RENDER_PERF_ENABLED == 1)
924 stateCache.perfCounters.drawCount++;
925 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
926 stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
927 #endif
928 } else {
929 vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
930 renderCmd.vertexCount, // vertexCount
931 renderCmd.instanceCount, // instanceCount
932 renderCmd.firstVertex, // firstVertex
933 renderCmd.firstInstance); // firstInstance
934 #if (RENDER_PERF_ENABLED == 1)
935 stateCache.perfCounters.drawCount++;
936 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
937 stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
938 * renderCmd.instanceCount;
939 #endif
940 }
941 }
942
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)943 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
944 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
945 {
946 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle);
947 if (gpuBuffer) {
948 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
949 const VkBuffer buffer = plat.buffer;
950 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
951 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
952 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
953 buffer, // buffer
954 offset, // offset
955 renderCmd.drawCount, // drawCount
956 renderCmd.stride); // stride
957 } else {
958 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
959 buffer, // buffer
960 (VkDeviceSize)renderCmd.offset, // offset
961 renderCmd.drawCount, // drawCount
962 renderCmd.stride); // stride
963 }
964 #if (RENDER_PERF_ENABLED == 1)
965 stateCache.perfCounters.drawIndirectCount++;
966 #endif
967 }
968 }
969
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)970 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
971 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
972 {
973 vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
974 renderCmd.groupCountX, // groupCountX
975 renderCmd.groupCountY, // groupCountY
976 renderCmd.groupCountZ); // groupCountZ
977 #if (RENDER_PERF_ENABLED == 1)
978 stateCache.perfCounters.dispatchCount++;
979 #endif
980 }
981
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)982 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
983 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
984 const StateCache& stateCache)
985 {
986 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle);
987 if (gpuBuffer) {
988 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
989 const VkBuffer buffer = plat.buffer;
990 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
991 vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
992 buffer, // buffer
993 offset); // offset
994 #if (RENDER_PERF_ENABLED == 1)
995 stateCache.perfCounters.dispatchIndirectCount++;
996 #endif
997 }
998 }
999
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1000 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1001 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1002 StateCache& stateCache)
1003 {
1004 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1005 stateCache.renderCommandBeginRenderPass = &renderCmd;
1006
1007 NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1008 // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1009 stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1010
1011 // early out for multi render command list render pass
1012 if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1013 constexpr VkSubpassContents subpassContents { VkSubpassContents::VK_SUBPASS_CONTENTS_INLINE };
1014 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1015 subpassContents); // contents
1016
1017 return; // early out
1018 }
1019
1020 const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1021
1022 VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1023 bool hasClearValues = false;
1024 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1025 const auto& ref = renderPassDesc.attachments[idx];
1026 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1027 ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1028 const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1029 VkClearValue clearValue;
1030 if (RenderHandleUtil::IsDepthImage(handle)) {
1031 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1032 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1033 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1034 } else {
1035 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1036 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1037 sizeof(ref.clearValue.color))) {
1038 PLUGIN_LOG_E("Copying of clearValue.color failed.");
1039 }
1040 }
1041 clearValues[idx] = clearValue;
1042 hasClearValues = true;
1043 }
1044 }
1045
1046 // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1047 // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1048 const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1049
1050 VkRect2D renderArea {
1051 { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1052 { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1053 };
1054 // render area needs to be inside frame buffer
1055 const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1056 renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1057 renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1058 renderArea.extent.width = Math::min(renderArea.extent.width,
1059 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1060 renderArea.extent.height = Math::min(renderArea.extent.height,
1061 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1062
1063 const VkRenderPassBeginInfo renderPassBeginInfo {
1064 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // sType
1065 nullptr, // pNext
1066 stateCache.lowLevelRenderPassData.renderPass, // renderPass
1067 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1068 renderArea, // renderArea
1069 clearValueCount, // clearValueCount
1070 clearValues, // pClearValues
1071 };
1072
1073 const VkSubpassContents subpassContents = (VkSubpassContents)renderPassDesc.subpassContents;
1074 vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1075 &renderPassBeginInfo, // pRenderPassBegin
1076 subpassContents); // contents
1077 #if (RENDER_PERF_ENABLED == 1)
1078 stateCache.perfCounters.renderPassCount++;
1079 #endif
1080 }
1081
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1082 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1083 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1084 {
1085 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1086
1087 const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1088 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1089 subpassContents); // contents
1090 }
1091
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1092 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1093 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1094 {
1095 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1096
1097 // early out for multi render command list render pass
1098 if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1099 return; // NOTE
1100 }
1101
1102 stateCache.renderCommandBeginRenderPass = nullptr;
1103 stateCache.lowLevelRenderPassData = {};
1104
1105 vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1106 }
1107
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1108 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1109 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1110 const StateCache& stateCache)
1111 {
1112 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1113 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1114
1115 const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1116
1117 VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1118 VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1119 const GpuBufferVk* gpuBuffer = nullptr;
1120 RenderHandle currBufferHandle;
1121 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1122 const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1123 // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1124 // do not re-fetch the buffer if not needed
1125 if (currBufferHandle.id != currVb.bufferHandle.id) {
1126 currBufferHandle = currVb.bufferHandle;
1127 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1128 }
1129 PLUGIN_ASSERT(gpuBuffer);
1130 if (gpuBuffer) {
1131 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1132 const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1133 vertexBuffers[idx] = plat.buffer;
1134 offsets[idx] = offset;
1135 }
1136 }
1137
1138 vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1139 0, // firstBinding
1140 vertexBufferCount, // bindingCount
1141 vertexBuffers, // pBuffers
1142 offsets); // pOffsets
1143 }
1144
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1145 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1146 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1147 const StateCache& stateCache)
1148 {
1149 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1150
1151 PLUGIN_ASSERT(gpuBuffer);
1152 if (gpuBuffer) {
1153 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1154 const VkBuffer buffer = plat.buffer;
1155 const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1156 const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1157
1158 vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1159 buffer, // buffer
1160 offset, // offset
1161 indexType); // indexType
1162 }
1163 }
1164
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1165 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1166 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1167 {
1168 const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1169 const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1170 if (srcImagePtr && dstImagePtr) {
1171 const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1172 const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1173
1174 const ImageBlit& ib = renderCmd.imageBlit;
1175 const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1176 ? srcPlatImage.arrayLayers
1177 : ib.srcSubresource.layerCount;
1178 const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1179 ? dstPlatImage.arrayLayers
1180 : ib.dstSubresource.layerCount;
1181
1182 const VkImageSubresourceLayers srcSubresourceLayers {
1183 (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1184 ib.srcSubresource.mipLevel, // mipLevel
1185 ib.srcSubresource.baseArrayLayer, // baseArrayLayer
1186 srcLayerCount, // layerCount
1187 };
1188 const VkImageSubresourceLayers dstSubresourceLayers {
1189 (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1190 ib.dstSubresource.mipLevel, // mipLevel
1191 ib.dstSubresource.baseArrayLayer, // baseArrayLayer
1192 dstLayerCount, // layerCount
1193 };
1194
1195 const VkImageBlit imageBlit {
1196 srcSubresourceLayers, // srcSubresource
1197
1198 { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1199 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1200 (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1201
1202 dstSubresourceLayers, // dstSubresource
1203
1204 { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1205 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1206 (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1207 };
1208
1209 vkCmdBlitImage(cmdBuf.commandBuffer, // commandBuffer
1210 srcPlatImage.image, // srcImage
1211 (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1212 dstPlatImage.image, // dstImage
1213 (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1214 1, // regionCount
1215 &imageBlit, // pRegions
1216 (VkFilter)renderCmd.filter); // filter
1217 }
1218 }
1219
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1220 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1221 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1222 {
1223 const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1224 const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1225
1226 PLUGIN_ASSERT(srcGpuBuffer);
1227 PLUGIN_ASSERT(dstGpuBuffer);
1228
1229 if (srcGpuBuffer && dstGpuBuffer) {
1230 const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1231 const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1232 const VkBufferCopy bufferCopy {
1233 renderCmd.bufferCopy.srcOffset,
1234 renderCmd.bufferCopy.dstOffset,
1235 renderCmd.bufferCopy.size,
1236 };
1237
1238 if (bufferCopy.size > 0) {
1239 vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1240 srcBuffer, // srcBuffer
1241 dstBuffer, // dstBuffer
1242 1, // regionCount
1243 &bufferCopy); // pRegions
1244 }
1245 }
1246 }
1247
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1248 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1249 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1250 const StateCache& stateCache)
1251 {
1252 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1253 PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1254 return;
1255 }
1256
1257 const GpuBufferVk* gpuBuffer = nullptr;
1258 const GpuImageVk* gpuImage = nullptr;
1259 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1260 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1261 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1262 } else {
1263 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1264 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1265 }
1266
1267 if (gpuBuffer && gpuImage) {
1268 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1269 const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1270 const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1271 const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1272 ? platImage.arrayLayers
1273 : subresourceLayer.layerCount;
1274 const VkImageSubresourceLayers imageSubresourceLayer {
1275 (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1276 subresourceLayer.mipLevel,
1277 subresourceLayer.baseArrayLayer,
1278 layerCount,
1279 };
1280 const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1281 // Math::min to force staying inside image
1282 const uint32_t mip = subresourceLayer.mipLevel;
1283 const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1284 const Size3D& imageOffset = bufferImageCopy.imageOffset;
1285 const VkExtent3D imageExtent = {
1286 Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1287 Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1288 Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1289 };
1290 const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1291 (imageOffset.depth < imageSize.depth);
1292 const VkBufferImageCopy bufferImageCopyVk {
1293 bufferImageCopy.bufferOffset,
1294 bufferImageCopy.bufferRowLength,
1295 bufferImageCopy.bufferImageHeight,
1296 imageSubresourceLayer,
1297 { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1298 static_cast<int32_t>(imageOffset.depth) },
1299 imageExtent,
1300 };
1301
1302 const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1303 const VkImage image = (gpuImage->GetPlatformData()).image;
1304
1305 if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1306 vkCmdCopyBufferToImage(cmdBuf.commandBuffer, // commandBuffer
1307 buffer, // srcBuffer
1308 image, // dstImage
1309 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1310 1, // regionCount
1311 &bufferImageCopyVk); // pRegions
1312 } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1313 vkCmdCopyImageToBuffer(cmdBuf.commandBuffer, // commandBuffer
1314 image, // srcImage
1315 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1316 buffer, // dstBuffer
1317 1, // regionCount
1318 &bufferImageCopyVk); // pRegions
1319 }
1320 }
1321 }
1322
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1323 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1324 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1325 {
1326 const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1327 const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1328 if (srcGpuImage && dstGpuImage) {
1329 const ImageCopy& copy = renderCmd.imageCopy;
1330 const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1331 const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1332
1333 const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1334 const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1335 const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1336 ? srcPlatImage.arrayLayers
1337 : srcSubresourceLayer.layerCount;
1338 const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1339 ? dstPlatImage.arrayLayers
1340 : dstSubresourceLayer.layerCount;
1341
1342 const VkImageSubresourceLayers srcImageSubresourceLayer {
1343 (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1344 srcSubresourceLayer.mipLevel,
1345 srcSubresourceLayer.baseArrayLayer,
1346 srcLayerCount,
1347 };
1348 const VkImageSubresourceLayers dstImageSubresourceLayer {
1349 (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1350 dstSubresourceLayer.mipLevel,
1351 dstSubresourceLayer.baseArrayLayer,
1352 dstLayerCount,
1353 };
1354
1355 const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1356 const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1357
1358 VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1359 ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1360 ext.height =
1361 Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1362 ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1363
1364 const VkImageCopy imageCopyVk {
1365 srcImageSubresourceLayer, // srcSubresource
1366 { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1367 dstImageSubresourceLayer, // dstSubresource
1368 { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1369 ext, // extent
1370 };
1371 vkCmdCopyImage(cmdBuf.commandBuffer, // commandBuffer
1372 srcPlatImage.image, // srcImage
1373 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1374 dstPlatImage.image, // dstImage
1375 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1376 1, // regionCount
1377 &imageCopyVk); // pRegions
1378 }
1379 }
1380
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1381 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1382 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1383 const RenderBarrierList& rbl)
1384 {
1385 if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1386 return;
1387 }
1388
1389 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1390 rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1391 PLUGIN_ASSERT(barrierPointBarriers);
1392 if (!barrierPointBarriers) {
1393 return;
1394 }
1395 constexpr uint32_t maxBarrierCount { 8 };
1396 VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1397 VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1398 VkMemoryBarrier memoryBarriers[maxBarrierCount];
1399
1400 // generally there is only single barrierListCount per barrier point
1401 // in situations with batched render passes there can be many
1402 // NOTE: all barrier lists could be patched to single vk command if needed
1403 // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1404 const uint32_t barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1405 const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1406 #if (RENDER_VALIDATION_ENABLED == 1)
1407 uint32_t fullBarrierCount = 0u;
1408 #endif
1409 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1410 if (nextBarrierList == nullptr) { // cannot be null, just a safety
1411 PLUGIN_ASSERT(false);
1412 return;
1413 }
1414 const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1415 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1416 const uint32_t barrierCount = (uint32_t)barrierListRef.count;
1417
1418 uint32_t bufferBarrierIdx = 0;
1419 uint32_t imageBarrierIdx = 0;
1420 uint32_t memoryBarrierIdx = 0;
1421
1422 VkPipelineStageFlags srcPipelineStageMask { 0 };
1423 VkPipelineStageFlags dstPipelineStageMask { 0 };
1424 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1425
1426 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1427 const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1428
1429 uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1430 uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1431 if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1432 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1433 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1434 }
1435
1436 const RenderHandle resourceHandle = ref.resourceHandle;
1437 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1438
1439 PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1440 (handleType == RenderHandleType::GPU_IMAGE));
1441
1442 const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1443 const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1444
1445 srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1446 dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1447
1448 // NOTE: zero size buffer barriers allowed ATM
1449 if (handleType == RenderHandleType::GPU_BUFFER) {
1450 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle);
1451 PLUGIN_ASSERT(gpuBuffer);
1452 if (gpuBuffer) {
1453 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1454 // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1455 const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1456 const VkDeviceSize size =
1457 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1458 (VkDeviceSize)ref.dst.optionalByteSize);
1459 bufferMemoryBarriers[bufferBarrierIdx++] = {
1460 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1461 nullptr, // pNext
1462 srcAccessMask, // srcAccessMask
1463 dstAccessMask, // dstAccessMask
1464 srcQueueFamilyIndex, // srcQueueFamilyIndex
1465 dstQueueFamilyIndex, // dstQueueFamilyIndex
1466 platBuffer.buffer, // buffer
1467 offset, // offset
1468 size, // size
1469 };
1470 }
1471 } else if (handleType == RenderHandleType::GPU_IMAGE) {
1472 const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle);
1473 PLUGIN_ASSERT(gpuImage);
1474 if (gpuImage) {
1475 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1476
1477 const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1478 const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1479
1480 const VkImageAspectFlags imageAspectFlags =
1481 (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1482 ? platImage.aspectFlags
1483 : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1484
1485 const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1486 PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1487 ? platImage.mipLevels
1488 : ref.src.optionalImageSubresourceRange.levelCount;
1489 PLUGIN_ASSERT(levelCount <= platImage.mipLevels);
1490
1491 const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1492 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1493 ? platImage.arrayLayers
1494 : ref.src.optionalImageSubresourceRange.layerCount;
1495 PLUGIN_ASSERT(layerCount <= platImage.arrayLayers);
1496
1497 const VkImageSubresourceRange imageSubresourceRange {
1498 imageAspectFlags, // aspectMask
1499 ref.src.optionalImageSubresourceRange.baseMipLevel, // baseMipLevel
1500 levelCount, // levelCount
1501 ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1502 layerCount, // layerCount
1503 };
1504
1505 imageMemoryBarriers[imageBarrierIdx++] = {
1506 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1507 nullptr, // pNext
1508 srcAccessMask, // srcAccessMask
1509 dstAccessMask, // dstAccessMask
1510 srcImageLayout, // oldLayout
1511 dstImageLayout, // newLayout
1512 srcQueueFamilyIndex, // srcQueueFamilyIndex
1513 dstQueueFamilyIndex, // dstQueueFamilyIndex
1514 platImage.image, // image
1515 imageSubresourceRange, // subresourceRange
1516 };
1517 }
1518 } else {
1519 memoryBarriers[memoryBarrierIdx++] = {
1520 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1521 nullptr, // pNext
1522 srcAccessMask, // srcAccessMask
1523 dstAccessMask, // dstAccessMask
1524 };
1525 }
1526
1527 const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1528 const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1529 (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1530 ? true
1531 : false;
1532
1533 if (hasBarriers && resetBarriers) {
1534 #if (RENDER_VALIDATION_ENABLED == 1)
1535 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1536 #endif
1537 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1538 srcPipelineStageMask, // srcStageMask
1539 dstPipelineStageMask, // dstStageMask
1540 dependencyFlags, // dependencyFlags
1541 memoryBarrierIdx, // memoryBarrierCount
1542 memoryBarriers, // pMemoryBarriers
1543 bufferBarrierIdx, // bufferMemoryBarrierCount
1544 bufferMemoryBarriers, // pBufferMemoryBarriers
1545 imageBarrierIdx, // imageMemoryBarrierCount
1546 imageMemoryBarriers); // pImageMemoryBarriers
1547
1548 bufferBarrierIdx = 0;
1549 imageBarrierIdx = 0;
1550 memoryBarrierIdx = 0;
1551 }
1552 }
1553 }
1554 #if (RENDER_VALIDATION_ENABLED == 1)
1555 if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1556 PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1557 "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1558 barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1559 }
1560 #endif
1561 }
1562
RenderCommand(const RenderCommandUpdateDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)1563 void RenderBackendVk::RenderCommand(const RenderCommandUpdateDescriptorSets& renderCmd,
1564 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1565 const StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1566 {
1567 // NOTE: update to update all at once
1568 NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)ncdsm;
1569
1570 for (uint32_t descIdx = 0; descIdx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++descIdx) {
1571 const RenderHandle descHandle = renderCmd.descriptorSetHandles[descIdx];
1572 if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
1573 continue;
1574 }
1575
1576 // first update gpu descriptor indices
1577 ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1578
1579 // actual vulkan descriptor set update
1580 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descHandle);
1581
1582 if (descriptorSet) {
1583 const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1584 #if (RENDER_VALIDATION_ENABLED == 1)
1585 // get descriptor counts
1586 const LowLevelDescriptorCountsVk& descriptorCounts = aNcdsmVk.GetLowLevelDescriptorCounts(descHandle);
1587 if ((uint32_t)bindingResources.bindings.size() > descriptorCounts.writeDescriptorCount) {
1588 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1589 }
1590 #endif
1591 const uint32_t bindingCount = Math::min(
1592 (uint32_t)bindingResources.bindings.size(), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT);
1593 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
1594
1595 // max counts
1596 VkDescriptorBufferInfo descriptorBufferInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1597 VkDescriptorImageInfo descriptorImageInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1598 VkDescriptorImageInfo descriptorSamplerInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1599 #if (RENDER_VULKAN_RT_ENABLED == 1)
1600 VkWriteDescriptorSetAccelerationStructureKHR
1601 descriptorAccelInfos[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1602 uint32_t accelIndex = 0;
1603 #endif
1604 VkWriteDescriptorSet writeDescriptorSet[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
1605
1606 const auto& buffers = bindingResources.buffers;
1607 const auto& images = bindingResources.images;
1608 const auto& samplers = bindingResources.samplers;
1609 uint32_t bufferIndex = 0;
1610 uint32_t imageIndex = 0;
1611 uint32_t samplerIndex = 0;
1612 uint32_t writeBindIdx = 0;
1613 for (const auto& ref : buffers) {
1614 const uint32_t descriptorCount = ref.binding.descriptorCount;
1615 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1616 if (descriptorCount == 0) {
1617 continue;
1618 }
1619 const uint32_t arrayOffset = ref.arrayOffset;
1620 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1621 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1622 #if (RENDER_VULKAN_RT_ENABLED == 1)
1623 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1624 // first is the ref, starting from 1 we use array offsets
1625 const BindableBuffer& bRes =
1626 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1627 const GpuAccelerationStructureVk* accelPtr =
1628 gpuResourceMgr_.GetAccelerationStructure<GpuAccelerationStructureVk>(bRes.handle);
1629 if (accelPtr) {
1630 const GpuAccelerationStructurePlatformDataVk& platAccel = accelPtr->GetPlatformData();
1631 descriptorAccelInfos[accelIndex + idx] = {
1632 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1633 nullptr, // pNext
1634 descriptorCount, // accelerationStructureCount
1635 &platAccel.accelerationStructure, // pAccelerationStructures
1636 };
1637 }
1638 }
1639 writeDescriptorSet[writeBindIdx++] = {
1640 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1641 &descriptorAccelInfos[accelIndex], // pNext
1642 descriptorSet->descriptorSet, // dstSet
1643 ref.binding.binding, // dstBinding
1644 0, // dstArrayElement
1645 descriptorCount, // descriptorCount
1646 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1647 nullptr, // pImageInfo
1648 nullptr, // pBufferInfo
1649 nullptr, // pTexelBufferView
1650 };
1651 accelIndex += descriptorCount;
1652 #endif
1653 } else {
1654 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1655 // first is the ref, starting from 1 we use array offsets
1656 const BindableBuffer& bRes =
1657 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1658 const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1659 const GpuBufferVk* bufferPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle);
1660 if (bufferPtr) {
1661 const GpuBufferPlatformDataVk& platBuffer = bufferPtr->GetPlatformData();
1662 // takes into account dynamic ring buffers with mapping
1663 const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1664 const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1665 const VkDeviceSize bufferRange =
1666 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1667 (VkDeviceSize)bRes.byteSize);
1668 descriptorBufferInfos[bufferIndex + idx] = {
1669 platBuffer.buffer, // buffer
1670 byteOffset, // offset
1671 bufferRange, // range
1672 };
1673 }
1674 }
1675 writeDescriptorSet[writeBindIdx++] = {
1676 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1677 nullptr, // pNext
1678 descriptorSet->descriptorSet, // dstSet
1679 ref.binding.binding, // dstBinding
1680 0, // dstArrayElement
1681 descriptorCount, // descriptorCount
1682 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1683 nullptr, // pImageInfo
1684 &descriptorBufferInfos[bufferIndex], // pBufferInfo
1685 nullptr, // pTexelBufferView
1686 };
1687 bufferIndex += descriptorCount;
1688 }
1689 }
1690 for (const auto& ref : images) {
1691 const uint32_t descriptorCount = ref.binding.descriptorCount;
1692 // skip, array bindings which are bound from first index have also descriptorCount 0
1693 if (descriptorCount == 0) {
1694 continue;
1695 }
1696 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
1697 const uint32_t arrayOffset = ref.arrayOffset;
1698 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1699 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1700 // first is the ref, starting from 1 we use array offsets
1701 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
1702 const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle);
1703 if (imagePtr) {
1704 VkSampler sampler = VK_NULL_HANDLE;
1705 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
1706 const GpuSamplerVk* samplerPtr =
1707 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
1708 if (samplerPtr) {
1709 sampler = samplerPtr->GetPlatformData().sampler;
1710 }
1711 }
1712 const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
1713 const GpuImagePlatformDataViewsVk& platImageViews = imagePtr->GetPlatformDataViews();
1714 VkImageView imageView = platImage.imageView;
1715 if ((bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
1716 (bRes.mip < platImageViews.mipImageViews.size())) {
1717 imageView = platImageViews.mipImageViews[bRes.mip];
1718 } else if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
1719 (bRes.layer < platImageViews.layerImageViews.size())) {
1720 imageView = platImageViews.layerImageViews[bRes.layer];
1721 }
1722 descriptorImageInfos[imageIndex + idx] = {
1723 sampler, // sampler
1724 imageView, // imageView
1725 (VkImageLayout)bRes.imageLayout, // imageLayout
1726 };
1727 }
1728 }
1729 writeDescriptorSet[writeBindIdx++] = {
1730 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1731 nullptr, // pNext
1732 descriptorSet->descriptorSet, // dstSet
1733 ref.binding.binding, // dstBinding
1734 0, // dstArrayElement
1735 descriptorCount, // descriptorCount
1736 descriptorType, // descriptorType
1737 &descriptorImageInfos[imageIndex], // pImageInfo
1738 nullptr, // pBufferInfo
1739 nullptr, // pTexelBufferView
1740 };
1741 imageIndex += descriptorCount;
1742 }
1743 for (const auto& ref : samplers) {
1744 const uint32_t descriptorCount = ref.binding.descriptorCount;
1745 // skip, array bindings which are bound from first index have also descriptorCount 0
1746 if (descriptorCount == 0) {
1747 continue;
1748 }
1749 const uint32_t arrayOffset = ref.arrayOffset;
1750 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
1751 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1752 // first is the ref, starting from 1 we use array offsets
1753 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
1754 const GpuSamplerVk* samplerPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle);
1755 if (samplerPtr) {
1756 const GpuSamplerPlatformDataVk& platSampler = samplerPtr->GetPlatformData();
1757 descriptorSamplerInfos[samplerIndex + idx] = {
1758 platSampler.sampler, // sampler
1759 VK_NULL_HANDLE, // imageView
1760 VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
1761 };
1762 }
1763 }
1764 writeDescriptorSet[writeBindIdx++] = {
1765 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1766 nullptr, // pNext
1767 descriptorSet->descriptorSet, // dstSet
1768 ref.binding.binding, // dstBinding
1769 0, // dstArrayElement
1770 descriptorCount, // descriptorCount
1771 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1772 &descriptorSamplerInfos[samplerIndex], // pImageInfo
1773 nullptr, // pBufferInfo
1774 nullptr, // pTexelBufferView
1775 };
1776 samplerIndex += descriptorCount;
1777 }
1778 vkUpdateDescriptorSets(device, // device
1779 bindingCount, // descriptorWriteCount
1780 writeDescriptorSet, // pDescriptorWrites
1781 0, // descriptorCopyCount
1782 nullptr); // pDescriptorCopies
1783 #if (RENDER_PERF_ENABLED == 1)
1784 stateCache.perfCounters.updateDescriptorSetCount++;
1785 #endif
1786 }
1787 }
1788 }
1789
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)1790 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
1791 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1792 StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
1793 {
1794 const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
1795
1796 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
1797 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
1798 const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
1799 ? VK_PIPELINE_BIND_POINT_COMPUTE
1800 : VK_PIPELINE_BIND_POINT_GRAPHICS;
1801 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
1802
1803 const bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
1804 PLUGIN_ASSERT(valid); // render command list should enforce this
1805 const uint32_t firstSet = renderCmd.firstSet;
1806 const uint32_t setCount = renderCmd.setCount;
1807 if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
1808 uint32_t combinedDynamicOffsetCount = 0;
1809 uint32_t dynamicOffsetDescriptorSetIndices = 0;
1810 uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
1811
1812 VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1813 const uint32_t firstPlusCount = firstSet + setCount;
1814 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
1815 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
1816 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
1817 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
1818 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
1819 combinedDynamicOffsetCount += dynamicDescriptorCount;
1820
1821 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
1822 if (descriptorSet) {
1823 PLUGIN_ASSERT(descriptorSet->descriptorSet);
1824 descriptorSets[idx] = descriptorSet->descriptorSet;
1825 // update, copy to state cache
1826 PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
1827 stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
1828 const uint32_t currShift = (idx * 16u);
1829 const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
1830 uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
1831 stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
1832 }
1833 }
1834 }
1835
1836 PLUGIN_ASSERT(combinedDynamicOffsetCount <= PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT);
1837 uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT];
1838 uint32_t dynamicOffsetIdx = 0;
1839 const uint32_t userDynamicOffsetCount = renderCmd.dynamicOffsetCount;
1840 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
1841 if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
1842 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
1843 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
1844 const size_t dodResCount = dod.resources.size();
1845 for (size_t dodIdx = 0; dodIdx < dodResCount; ++dodIdx) {
1846 #if (RENDER_VALIDATION_ENABLED == 1)
1847 const GpuBuffer* gpuBuffer = gpuResourceMgr_.GetBuffer(dod.resources[dodIdx]);
1848 PLUGIN_UNUSED(gpuBuffer);
1849 PLUGIN_ASSERT(gpuBuffer);
1850 #endif
1851 uint32_t byteOffset = 0;
1852 if (dynamicOffsetIdx < userDynamicOffsetCount) {
1853 byteOffset += renderCmd.dynamicOffsets[dynamicOffsetIdx];
1854 }
1855 dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
1856 }
1857 }
1858 }
1859
1860 if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
1861 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
1862 pipelineBindPoint, // pipelineBindPoint
1863 pipelineLayout, // layout
1864 firstSet, // firstSet
1865 setCount, // descriptorSetCount
1866 &descriptorSets[firstSet], // pDescriptorSets
1867 dynamicOffsetIdx, // dynamicOffsetCount
1868 dynamicOffsets); // pDynamicOffsets
1869 #if (RENDER_PERF_ENABLED == 1)
1870 stateCache.perfCounters.bindDescriptorSetCount++;
1871 #endif
1872 } else {
1873 // possible pso re-creation and bind of these sets to the new pso
1874 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
1875 (PipelineBindPoint)pipelineBindPoint };
1876 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
1877 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
1878 }
1879 }
1880 }
1881
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1882 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1883 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1884 {
1885 PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
1886 PLUGIN_ASSERT(renderCmd.data);
1887
1888 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
1889 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
1890
1891 const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
1892 PLUGIN_ASSERT(valid);
1893
1894 if (valid) {
1895 const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
1896 vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
1897 pipelineLayout, // layout
1898 shaderStageFlags, // stageFlags
1899 0, // offset
1900 renderCmd.pushConstant.byteSize, // size
1901 static_cast<void*>(renderCmd.data)); // pValues
1902 }
1903 }
1904
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1905 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
1906 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1907 const StateCache& stateCache)
1908 {
1909 #if (RENDER_VULKAN_RT_ENABLED == 1)
1910 // NOTE: missing
1911 const GpuAccelerationStructureVk* dst =
1912 gpuResourceMgr_.GetAccelerationStructure<const GpuAccelerationStructureVk>(renderCmd.dstAccelerationStructure);
1913 const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
1914 if (dst && scratchBuffer) {
1915 const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
1916 const VkDevice device = devicePlat.device;
1917
1918 const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformData();
1919 const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
1920
1921 // scratch data with user offset
1922 const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
1923 VkDeviceSize(renderCmd.scratchOffset) };
1924
1925 const size_t arraySize =
1926 renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
1927 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
1928 vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
1929
1930 size_t arrayIndex = 0;
1931 for (const auto& trianglesRef : renderCmd.trianglesView) {
1932 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1933 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1934 nullptr, // pNext
1935 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
1936 {}, // geometry;
1937 0, // flags
1938 };
1939 uint32_t primitiveCount = 0;
1940 const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
1941 const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
1942 if (vb && ib) {
1943 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
1944 device, vb->GetPlatformData().buffer) };
1945 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
1946 device, ib->GetPlatformData().buffer) };
1947 VkDeviceOrHostAddressConstKHR transformData {};
1948 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
1949 if (const GpuBufferVk* tr =
1950 gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
1951 tr) {
1952 transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
1953 }
1954 }
1955 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
1956
1957 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
1958 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
1959 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
1960 nullptr, // pNext
1961 VkFormat(trianglesRef.info.vertexFormat), // vertexFormat
1962 vertexData, // vertexData
1963 VkDeviceSize(trianglesRef.info.vertexStride), // vertexStride
1964 trianglesRef.info.maxVertex, // maxVertex
1965 VkIndexType(trianglesRef.info.indexType), // indexType
1966 indexData, // indexData
1967 transformData, // transformData
1968 };
1969 }
1970 buildRangeInfos[arrayIndex] = {
1971 primitiveCount, // primitiveCount
1972 0u, // primitiveOffset
1973 0u, // firstVertex
1974 0u, // transformOffset
1975 };
1976 arrayIndex++;
1977 }
1978 for (const auto& aabbsRef : renderCmd.aabbsView) {
1979 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
1980 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
1981 nullptr, // pNext
1982 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
1983 {}, // geometry;
1984 0, // flags
1985 };
1986 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
1987 const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle);
1988 if (iPtr) {
1989 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
1990 }
1991 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
1992 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
1993 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
1994 nullptr, // pNext
1995 deviceAddress, // data
1996 aabbsRef.info.stride, // stride
1997 };
1998 buildRangeInfos[arrayIndex] = {
1999 1u, // primitiveCount
2000 0u, // primitiveOffset
2001 0u, // firstVertex
2002 0u, // transformOffset
2003 };
2004 arrayIndex++;
2005 }
2006 for (const auto& instancesRef : renderCmd.instancesView) {
2007 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2008 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2009 nullptr, // pNext
2010 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
2011 {}, // geometry;
2012 0, // flags
2013 };
2014 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2015 const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2016 if (iPtr) {
2017 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2018 }
2019 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2020 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2021 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2022 nullptr, // pNext
2023 instancesRef.info.arrayOfPointers, // arrayOfPointers
2024 deviceAddress, // data
2025 };
2026 buildRangeInfos[arrayIndex] = {
2027 1u, // primitiveCount
2028 0u, // primitiveOffset
2029 0u, // firstVertex
2030 0u, // transformOffset
2031 };
2032 arrayIndex++;
2033 }
2034
2035 const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2036 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2037 nullptr, // pNext
2038 VkAccelerationStructureTypeKHR(renderCmd.type), // type
2039 VkBuildAccelerationStructureFlagsKHR(renderCmd.flags), // flags
2040 VkBuildAccelerationStructureModeKHR(renderCmd.mode), // mode
2041 VK_NULL_HANDLE, // srcAccelerationStructure
2042 dstAs, // dstAccelerationStructure
2043 uint32_t(arrayIndex), // geometryCount
2044 geometryData.data(), // pGeometries
2045 nullptr, // ppGeometries
2046 scratchData, // scratchData
2047 };
2048
2049 vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2050 for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2051 buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2052 }
2053 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2054 if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2055 extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2056 1u, // infoCount
2057 &buildGeometryInfo, // pInfos
2058 buildRangeInfosPtr.data()); // ppBuildRangeInfos
2059 }
2060 }
2061 #endif
2062 }
2063
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2064 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2065 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2066 const StateCache& stateCache)
2067 {
2068 const ViewportDesc& vd = renderCmd.viewportDesc;
2069
2070 const VkViewport viewport {
2071 vd.x, // x
2072 vd.y, // y
2073 vd.width, // width
2074 vd.height, // height
2075 vd.minDepth, // minDepth
2076 vd.maxDepth, // maxDepth
2077 };
2078
2079 vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2080 0, // firstViewport
2081 1, // viewportCount
2082 &viewport); // pViewports
2083 }
2084
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2085 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2086 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2087 const StateCache& stateCache)
2088 {
2089 const ScissorDesc& sd = renderCmd.scissorDesc;
2090
2091 const VkRect2D scissor {
2092 { sd.offsetX, sd.offsetY }, // offset
2093 { sd.extentWidth, sd.extentHeight }, // extent
2094 };
2095
2096 vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2097 0, // firstScissor
2098 1, // scissorCount
2099 &scissor); // pScissors
2100 }
2101
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2102 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2103 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2104 const StateCache& stateCache)
2105 {
2106 vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2107 renderCmd.lineWidth); // lineWidth
2108 }
2109
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2110 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2111 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2112 const StateCache& stateCache)
2113 {
2114 vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2115 renderCmd.depthBiasConstantFactor, // depthBiasConstantFactor
2116 renderCmd.depthBiasClamp, // depthBiasClamp
2117 renderCmd.depthBiasSlopeFactor); // depthBiasSlopeFactor
2118 }
2119
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2120 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2121 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2122 const StateCache& stateCache)
2123 {
2124 vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2125 renderCmd.blendConstants); // blendConstants[4]
2126 }
2127
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2128 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2129 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2130 const StateCache& stateCache)
2131 {
2132 vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2133 renderCmd.minDepthBounds, // minDepthBounds
2134 renderCmd.maxDepthBounds); // maxDepthBounds
2135 }
2136
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2137 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2138 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2139 const StateCache& stateCache)
2140 {
2141 const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2142
2143 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2144 vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2145 stencilFaceMask, // faceMask
2146 renderCmd.mask); // compareMask
2147 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2148 vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2149 stencilFaceMask, // faceMask
2150 renderCmd.mask); // writeMask
2151 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2152 vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2153 stencilFaceMask, // faceMask
2154 renderCmd.mask); // reference
2155 }
2156 }
2157
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2158 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2159 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2160 const StateCache& stateCache)
2161 {
2162 if (stateCache.backendNode) {
2163 const RenderBackendRecordingStateVk recordingState = {
2164 {},
2165 cmdBuf.commandBuffer, // commandBuffer
2166 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2167 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2168 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2169 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2170 stateCache.pipelineLayout, // pipelineLayout
2171 };
2172 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2173 stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2174 }
2175 }
2176
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2177 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2178 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2179 {
2180 PLUGIN_ASSERT_MSG(false, "not implemented");
2181
2182 const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2183 const uint32_t queryIndex = renderCmd.queryIndex;
2184 VkQueryPool queryPool = VK_NULL_HANDLE;
2185
2186 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2187 queryPool, // queryPool
2188 queryIndex, // firstQuery
2189 1); // queryCount
2190
2191 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2192 pipelineStageFlagBits, // pipelineStage
2193 queryPool, // queryPool
2194 queryIndex); // query
2195 }
2196
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf)2197 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf)
2198 {
2199 PLUGIN_ASSERT(presentationInfo_.presentationLayoutChangeNeeded);
2200 PLUGIN_ASSERT(presentationInfo_.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR);
2201
2202 const GpuResourceState& state = presentationInfo_.renderGraphProcessedState;
2203 const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2204 const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2205 const VkPipelineStageFlags srcStageMask =
2206 ((VkPipelineStageFlags)state.pipelineStageFlags) | (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2207 const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2208 const VkImageLayout oldLayout = (VkImageLayout)presentationInfo_.imageLayout;
2209 const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2210 // NOTE: queue is not currently checked (should be in the same queue as last time used)
2211 constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2212 constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2213 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2214 constexpr VkImageSubresourceRange imageSubresourceRange {
2215 VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2216 0, // baseMipLevel
2217 1, // levelCount
2218 0, // baseArrayLayer
2219 1, // layerCount
2220 };
2221
2222 const VkImageMemoryBarrier imageMemoryBarrier {
2223 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2224 nullptr, // pNext
2225 srcAccessMask, // srcAccessMask
2226 dstAccessMask, // dstAccessMask
2227 oldLayout, // oldLayout
2228 newLayout, // newLayout
2229 srcQueueFamilyIndex, // srcQueueFamilyIndex
2230 dstQueueFamilyIndex, // dstQueueFamilyIndex
2231 presentationInfo_.swapchainImage, // image
2232 imageSubresourceRange, // subresourceRange
2233 };
2234
2235 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2236 srcStageMask, // srcStageMask
2237 dstStageMask, // dstStageMask
2238 dependencyFlags, // dependencyFlags
2239 0, // memoryBarrierCount
2240 nullptr, // pMemoryBarriers
2241 0, // bufferMemoryBarrierCount
2242 nullptr, // pBufferMemoryBarriers
2243 1, // imageMemoryBarrierCount
2244 &imageMemoryBarrier); // pImageMemoryBarriers
2245
2246 presentationInfo_.presentationLayoutChangeNeeded = false;
2247 presentationInfo_.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2248 }
2249
2250 #if (RENDER_PERF_ENABLED == 1)
2251
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2252 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2253 {
2254 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2255 const string_view& debugName = renderCommandContext.debugName;
2256 if (timers_.count(debugName) == 0) { // new timers
2257 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2258 PerfDataSet& perfDataSet = timers_[debugName];
2259 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2260 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2261 constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2262 perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2263 #else
2264 timers_.insert({ debugName, {} });
2265 #endif
2266 }
2267 }
2268
2269 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2270 perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2271 perfGpuTimerData_.currentOffset =
2272 (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2273 #endif
2274 }
2275
EndFrameTimers()2276 void RenderBackendVk::EndFrameTimers()
2277 {
2278 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2279 perfGpuTimerData_.gpuBuffer->Unmap();
2280 #endif
2281 if (IPerformanceDataManagerFactory* globalPerfData =
2282 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2283 globalPerfData) {
2284 IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2285 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2286 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2287 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2288 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2289 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2290 }
2291 }
2292
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits)2293 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2294 const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits)
2295 {
2296 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2297 PLUGIN_ASSERT(timers_.count(name) == 1);
2298 const PerfDataSet* perfDataSet = &timers_[name];
2299
2300 const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
2301 PLUGIN_ASSERT(gpuQuery);
2302
2303 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2304 PLUGIN_ASSERT(platData.queryPool);
2305
2306 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2307 platData.queryPool, // queryPool
2308 queryIndex, // firstQuery
2309 1); // queryCount
2310
2311 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2312 stageFlagBits, // pipelineStage,
2313 platData.queryPool, // queryPool,
2314 queryIndex); // query
2315 #endif
2316 }
2317
2318 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2319 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2320 {
2321 perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2322 perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2323 perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2324 perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2325 perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2326 perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2327 perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2328 perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2329 perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2330 perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2331 }
2332 } // namespace
2333
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & cache)2334 void RenderBackendVk::CopyPerfTimeStamp(
2335 const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& cache)
2336 {
2337 PLUGIN_ASSERT(timers_.count(name) == 1);
2338 const PerfDataSet* perfDataSet = &timers_[name];
2339
2340 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2341 // take data from earlier queries to cpu
2342 // and copy in from query to gpu buffer
2343 const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2344 int64_t gpuMicroSeconds = 0;
2345 {
2346 auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2347 auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2348
2349 const uint64_t startStamp = *currentData;
2350 const uint64_t endStamp = *(currentData + 1);
2351
2352 const double timestampPeriod =
2353 static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2354 .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2355 constexpr int64_t nanosToMicrosDivisor { 1000 };
2356 gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2357 constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2358 if (gpuMicroSeconds > maxValidMicroSecondValue) {
2359 gpuMicroSeconds = 0;
2360 }
2361 }
2362 #endif
2363 const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2364
2365 if (IPerformanceDataManagerFactory* globalPerfData =
2366 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2367 globalPerfData) {
2368 IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2369
2370 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2371 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2372 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2373
2374 const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
2375 PLUGIN_ASSERT(gpuQuery);
2376
2377 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2378
2379 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2380 PLUGIN_ASSERT(gpuBuffer);
2381 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2382
2383 constexpr uint32_t queryCount = 2;
2384 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2385 constexpr VkQueryResultFlags queryResultFlags =
2386 VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2387
2388 vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2389 platData.queryPool, // queryPool
2390 0, // firstQuery
2391 queryCount, // queryCount
2392 platBuffer.buffer, // dstBuffer
2393 currentFrameByteOffset, // dstOffset
2394 queryStride, // stride
2395 queryResultFlags); // flags
2396 #endif
2397 UpdatePerfCounters(*perfData, name, cache.perfCounters);
2398 }
2399 }
2400
2401 #endif
2402 RENDER_END_NAMESPACE()
2403