• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_command_list.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26 
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "util/linear_allocator.h"
32 #include "util/log.h"
33 
34 using namespace BASE_NS;
35 
36 RENDER_BEGIN_NAMESPACE()
37 PLUGIN_STATIC_ASSERT(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT == 4);
38 PLUGIN_STATIC_ASSERT(PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT == 8u);
39 namespace {
40 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)41 void ValidateImageUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
42     const RenderHandle handl, const ImageUsageFlags imageUsageFlags, const string_view str)
43 {
44     if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
45         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateImageUsageFlags_",
46             "RENDER_VALIDATION: gpu image (handle: %" PRIu64
47             ") (name: %s), not created with needed flags: %s, (node: %s)",
48             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
49     }
50 }
51 
ValidateBufferUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)52 void ValidateBufferUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
53     const RenderHandle handl, const BufferUsageFlags bufferUsageFlags, const string_view str)
54 {
55     if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
56         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateBufferUsageFlags_",
57             "RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
58             ") (name: %s), not created with needed flags: %s, (node: %s)",
59             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
60     }
61 }
62 
ValidateDescriptorTypeBinding(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)63 void ValidateDescriptorTypeBinding(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
64     const DescriptorSetLayoutBindingResources& bindingRes)
65 {
66     for (const auto& ref : bindingRes.buffers) {
67         if (!RenderHandleUtil::IsGpuBuffer(ref.resource.handle)) {
68             PLUGIN_LOG_E("RENDER_VALIDATION: invalid GPU buffer");
69         }
70         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
71             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
72                 CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
73         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
74             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
75                 CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
76         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
77             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
78                 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
79         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
80             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
81                 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
82         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
83             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
84                 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
85         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
86             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
87                 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
88         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
89         } else {
90             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
91         }
92     }
93     for (const auto& ref : bindingRes.images) {
94         if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
95             (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
96             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT,
97                 "CORE_IMAGE_USAGE_SAMPLED_BIT");
98         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
99             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT,
100                 "CORE_IMAGE_USAGE_STORAGE_BIT");
101         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
102             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
103                 CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
104         } else {
105             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
106         }
107     }
108     for (const auto& ref : bindingRes.samplers) {
109         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
110         } else {
111             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
112         }
113     }
114 }
115 
ValidateRenderPassAttachment(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)116 void ValidateRenderPassAttachment(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
117     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
118 {
119     const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[0]);
120     const uint32_t baseWidth = baseDesc.width;
121     const uint32_t baseHeight = baseDesc.height;
122     // NOTE: we do not check fragment shading rate attachment size
123     for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDesc.attachmentCount; ++attachmentIdx) {
124         const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[attachmentIdx]);
125         if (desc.width != baseWidth || desc.height != baseHeight) {
126             for (const auto& subpassRef : subpassDescs) {
127                 auto CheckAttachments = [](const auto& indices, const uint32_t count, const uint32_t attachmentIndex) {
128                     for (uint32_t idx = 0; idx < count; ++idx) {
129                         if (indices[idx] == attachmentIndex) {
130                             return false;
131                         }
132                     }
133                     return true;
134                 };
135                 bool valid = true;
136                 valid &=
137                     CheckAttachments(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount, attachmentIdx);
138                 valid &=
139                     CheckAttachments(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount, attachmentIdx);
140                 valid &= CheckAttachments(
141                     subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount, attachmentIdx);
142                 if ((subpassRef.depthAttachmentIndex == attachmentIdx) ||
143                     (subpassRef.depthResolveAttachmentIndex == attachmentIdx)) {
144                     valid = false;
145                 }
146                 if (!valid) {
147                     if (RenderHandleUtil::IsSwapchain(renderPassDesc.attachmentHandles[attachmentIdx]) &&
148                         RenderHandleUtil::IsDepthImage(renderPassDesc.attachmentHandles[0])) {
149                         PLUGIN_LOG_ONCE_W(nodeName + "_RCL_ValidateSize1_",
150                             "RENDER_VALIDATION: Depth and swapchain input missmatch: baseWidth:%u baseHeight:%u "
151                             "currWidth:%u currHeight:%u",
152                             baseWidth, baseHeight, desc.width, desc.height);
153                     } else {
154                         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
155                             "RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
156                             attachmentIdx);
157                         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
158                             "RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
159                             baseHeight, desc.width, desc.height);
160                     }
161                 }
162             }
163         }
164     }
165     if ((renderPassDesc.renderArea.extentWidth == 0) || (renderPassDesc.renderArea.extentHeight == 0)) {
166         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaExtent_",
167             "RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u) (node: %s)",
168             renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight, nodeName.data());
169     }
170     if ((renderPassDesc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
171         (renderPassDesc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
172         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaOffset_",
173             "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
174             "%u, baseHeight: %u, (node: %s)",
175             renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY, baseWidth, baseHeight,
176             nodeName.data());
177     }
178 }
179 
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)180 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
181     const ImageSubresourceRange& imageSubresourceRange)
182 {
183     const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
184     if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
185         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
186             imageSubresourceRange.baseMipLevel, desc.mipCount);
187     }
188     if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
189         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
190             imageSubresourceRange.baseArrayLayer, desc.layerCount);
191     }
192 }
193 
ValidateViewport(const string_view nodeName,const ViewportDesc & vd)194 void ValidateViewport(const string_view nodeName, const ViewportDesc& vd)
195 {
196     if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
197         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateViewport_",
198             "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger (node: %s)", vd.width,
199             vd.height, nodeName.data());
200     }
201 }
202 
ValidateScissor(const string_view nodeName,const ScissorDesc & sd)203 void ValidateScissor(const string_view nodeName, const ScissorDesc& sd)
204 {
205     if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
206         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateScissor_",
207             "RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero (node: %s)",
208             sd.extentWidth, sd.extentHeight, nodeName.data());
209     }
210 }
211 
ValidateFragmentShadingRate(const Size2D & size)212 void ValidateFragmentShadingRate(const Size2D& size)
213 {
214     bool valid = true;
215     if ((size.width == 0) || (size.height == 0)) {
216         valid = false;
217     } else if ((size.width == 3u) || (size.height == 3u)) {
218         valid = false;
219     } else if ((size.width > 4u) || (size.height > 4u)) {
220         valid = false;
221     }
222     if (!valid) {
223         PLUGIN_LOG_W("RENDER_VALIDATION_ENABLED: fragmentSize must be less than or equal to 4 and the value must be a "
224                      "power of two (width = %u, height = %u)",
225             size.width, size.height);
226     }
227 }
228 #endif // RENDER_VALIDATION_ENABLED
229 
230 constexpr uint32_t INVALID_CL_IDX { ~0u };
231 
232 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
233 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
234 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
235 
236 // automatic acquire and release barriers
237 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
238 
GetAlignedBytesize(const size_t byteSize,const size_t alignment)239 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
240 {
241     return (byteSize + alignment - 1) & (~(alignment - 1));
242 }
243 
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t alignment,const size_t byteSz)244 void* AllocateRenderData(
245     RenderCommandList::LinearAllocatorStruct& allocator, const size_t alignment, const size_t byteSz)
246 {
247     PLUGIN_ASSERT(byteSz > 0);
248     void* rc = nullptr;
249     if (!allocator.allocators.empty()) {
250         const size_t currentIndex = allocator.allocators.size() - 1;
251         rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
252     }
253 
254     if (rc == nullptr) { // current allocator is out of memory
255         size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
256         const size_t currentIndex = allocator.allocators.size();
257         if (currentIndex > 0) {
258             allocatorByteSize =
259                 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
260         }
261         allocator.allocators.push_back(make_unique<LinearAllocator>(allocatorByteSize));
262 
263         rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
264         if (rc == nullptr) {
265             PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
266             PLUGIN_ASSERT(false);
267         }
268     }
269     return rc;
270 }
271 
272 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)273 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
274 {
275     return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T) * count));
276 }
277 
278 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)279 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
280 {
281     return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T)));
282 }
283 } // namespace
284 
RenderCommandList(const BASE_NS::string_view nodeName,NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)285 RenderCommandList::RenderCommandList(const BASE_NS::string_view nodeName,
286     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr, const GpuResourceManager& gpuResourceMgr,
287     const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue, const bool enableMultiQueue)
288     : IRenderCommandList(), nodeName_(nodeName),
289 #if (RENDER_VALIDATION_ENABLED == 1)
290       gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
291 #endif
292       nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
293       enableMultiQueue_(enableMultiQueue)
294 {}
295 
BeginFrame()296 void RenderCommandList::BeginFrame()
297 {
298     if (allocator_.allocators.size() == 1) { // size is good for this frame
299         allocator_.allocators[0]->Reset();
300     } else if (allocator_.allocators.size() > 1) {
301         size_t fullByteSize = 0;
302         size_t alignment = 0;
303         for (auto& ref : allocator_.allocators) {
304             fullByteSize += ref->GetCurrentByteSize();
305             alignment = Math::max(alignment, (size_t)ref->GetAlignment());
306             ref.reset();
307         }
308         allocator_.allocators.clear();
309 
310         // add some room for current frame allocation for new render commands
311         const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
312         fullByteSize += extraBytes;
313 
314         // create new single allocation for combined previous size and some extra bytes
315         const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
316         allocator_.allocators.push_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
317     }
318 
319     ResetStateData();
320 
321     const auto clearAndReserve = [](auto& vec) {
322         const size_t count = vec.size();
323         vec.clear();
324         vec.reserve(count);
325     };
326 
327     clearAndReserve(renderCommands_);
328     clearAndReserve(customBarriers_);
329     clearAndReserve(rpVertexInputBufferBarriers_);
330     clearAndReserve(rpIndirectBufferBarriers_);
331     clearAndReserve(descriptorSetHandlesForBarriers_);
332     clearAndReserve(descriptorSetHandlesForUpdates_);
333 
334     validReleaseAcquire_ = false;
335     hasMultiRpCommandListSubpasses_ = false;
336     multiRpCommandListData_ = {};
337     hasGlobalDescriptorSetBindings_ = false;
338 }
339 
SetValidGpuQueueReleaseAcquireBarriers()340 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
341 {
342     if (enableMultiQueue_) {
343         validReleaseAcquire_ = true;
344     }
345 }
346 
BeforeRenderNodeExecuteFrame()347 void RenderCommandList::BeforeRenderNodeExecuteFrame()
348 {
349     // add possible barrier point for gpu queue transfer acquire
350     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
351         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
352     }
353 }
354 
AfterRenderNodeExecuteFrame()355 void RenderCommandList::AfterRenderNodeExecuteFrame()
356 {
357 #if (RENDER_VALIDATION_ENABLED == 1)
358     if (stateData_.renderPassHasBegun) {
359         PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
360     }
361     if (!stateData_.automaticBarriersEnabled) {
362         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
363     }
364 #endif
365 
366     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
367         if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
368             AddBarrierPoint(RenderCommandType::BARRIER_POINT);
369         }
370 
371         // add possible barrier point for gpu queue transfer release
372         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
373     }
374 
375 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
376     for (uint32_t idx = debugMarkerStack_.stackCounter; idx > 0U; --idx) {
377         EndDebugMarker();
378     }
379 #endif
380 }
381 
GetRenderCommands() const382 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
383 {
384     if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
385 #if (RENDER_VALIDATION_ENABLED == 1)
386         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_GetRenderCommands_",
387             "RenderCommandList: invalid state data in render command list (node: %s)", nodeName_.c_str());
388 #endif
389         return {};
390     } else {
391         return { renderCommands_.data(), renderCommands_.size() };
392     }
393 }
394 
HasValidRenderCommands() const395 bool RenderCommandList::HasValidRenderCommands() const
396 {
397     const uint32_t renderCommandCount = GetRenderCommandCount();
398     bool valid = false;
399     if (enableMultiQueue_) {
400         if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
401             // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
402             valid = validReleaseAcquire_;
403         } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
404             valid = true;
405         }
406     } else {
407         valid = (renderCommandCount > 0);
408     }
409     valid = valid && stateData_.validCommandList;
410 
411     return valid;
412 }
413 
GetRenderCommandCount() const414 uint32_t RenderCommandList::GetRenderCommandCount() const
415 {
416     return (uint32_t)renderCommands_.size();
417 }
418 
GetGpuQueue() const419 GpuQueue RenderCommandList::GetGpuQueue() const
420 {
421     return gpuQueue_;
422 }
423 
HasMultiRenderCommandListSubpasses() const424 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
425 {
426     return hasMultiRpCommandListSubpasses_;
427 }
428 
GetMultiRenderCommandListData() const429 MultiRenderPassCommandListData RenderCommandList::GetMultiRenderCommandListData() const
430 {
431     return multiRpCommandListData_;
432 }
433 
HasGlobalDescriptorSetBindings() const434 bool RenderCommandList::HasGlobalDescriptorSetBindings() const
435 {
436     return hasGlobalDescriptorSetBindings_;
437 }
438 
GetCustomBarriers() const439 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
440 {
441     return { customBarriers_.data(), customBarriers_.size() };
442 }
443 
GetRenderpassVertexInputBufferBarriers() const444 array_view<const VertexBuffer> RenderCommandList::GetRenderpassVertexInputBufferBarriers() const
445 {
446     return { rpVertexInputBufferBarriers_.data(), rpVertexInputBufferBarriers_.size() };
447 }
448 
GetRenderpassIndirectBufferBarriers() const449 array_view<const VertexBuffer> RenderCommandList::GetRenderpassIndirectBufferBarriers() const
450 {
451     return { rpIndirectBufferBarriers_.data(), rpIndirectBufferBarriers_.size() };
452 }
453 
GetDescriptorSetHandles() const454 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
455 {
456     return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
457 }
458 
GetUpdateDescriptorSetHandles() const459 array_view<const RenderHandle> RenderCommandList::GetUpdateDescriptorSetHandles() const
460 {
461     return { descriptorSetHandlesForUpdates_.data(), descriptorSetHandlesForUpdates_.size() };
462 }
463 
AddBarrierPoint(const RenderCommandType renderCommandType)464 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
465 {
466     if (!stateData_.automaticBarriersEnabled) {
467         return; // no barrier point added
468     }
469 
470     auto* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
471     if (!data) {
472         return; // early out
473     }
474     *data = {}; // zero initialize
475 
476     data->renderCommandType = renderCommandType;
477     data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
478 
479     // update new index (within render pass there might not be any dirty descriptor sets at this stage)
480     const auto descriptorSetBeginIndex = static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size());
481     data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
482     data->descriptorSetHandleCount = 0U;
483     // update new index (only valid with render pass)
484     data->vertexIndexBarrierIndexBegin = static_cast<uint32_t>(rpVertexInputBufferBarriers_.size());
485     data->vertexIndexBarrierCount = 0U;
486     // update new index (only valid with render pass)
487     data->indirectBufferBarrierIndexBegin = static_cast<uint32_t>(rpIndirectBufferBarriers_.size());
488     data->indirectBufferBarrierCount = 0U;
489 
490     // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
491     const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
492                                       renderCommandType == RenderCommandType::DISPATCH ||
493                                       renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
494     if (handleDescriptorSets) {
495         stateData_.dirtyDescriptorSetsForBarriers = false;
496         for (auto& currentBoundSet : stateData_.currentBoundSets) {
497             // only add descriptor set handles for barriers if there are dynamic barrier resources
498             if (currentBoundSet.hasDynamicBarrierResources) {
499                 descriptorSetHandlesForBarriers_.push_back(currentBoundSet.descriptorSetHandle);
500             }
501         }
502         data->descriptorSetHandleCount = (uint32_t)descriptorSetHandlesForBarriers_.size() - descriptorSetBeginIndex;
503     }
504 
505     const bool handleCustomBarriers =
506         ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
507     if (handleCustomBarriers) {
508         const int32_t newCount = (int32_t)customBarriers_.size() - stateData_.currentCustomBarrierIndices.prevSize;
509         if (newCount > 0) {
510             data->customBarrierIndexBegin = (uint32_t)stateData_.currentCustomBarrierIndices.prevSize;
511             data->customBarrierCount = (uint32_t)newCount;
512 
513             stateData_.currentCustomBarrierIndices.prevSize = (int32_t)customBarriers_.size();
514             stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
515         }
516     }
517 
518     // store current barrier point for render command list
519     // * binding descriptor sets (with dynamic barrier resources)
520     // * binding vertex and index buffers (with dynamic barrier resources)
521     // * indirect args buffer (with dynamic barrier resources)
522     // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
523     stateData_.currentBarrierPoint = data;
524 
525     renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
526 }
527 
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)528 void RenderCommandList::Draw(
529     const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
530 {
531 #if (RENDER_VALIDATION_ENABLED == 1)
532     if (!stateData_.renderPassHasBegun) {
533         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_Draw_",
534             "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw)");
535     }
536 #endif
537 
538     if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
539         ValidatePipeline();
540         ValidatePipelineLayout();
541 
542         auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
543         if (data) {
544             data->drawType = DrawType::DRAW;
545             data->vertexCount = vertexCount;
546             data->instanceCount = instanceCount;
547             data->firstVertex = firstVertex;
548             data->firstInstance = firstInstance;
549             data->indexCount = 0;
550             data->firstIndex = 0;
551             data->vertexOffset = 0;
552 
553             renderCommands_.push_back({ RenderCommandType::DRAW, data });
554         }
555     }
556 }
557 
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)558 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
559     const int32_t vertexOffset, const uint32_t firstInstance)
560 {
561 #if (RENDER_VALIDATION_ENABLED == 1)
562     if (!stateData_.renderPassHasBegun) {
563         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DrawIndexed_",
564             "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw).");
565     }
566 #endif
567 
568     if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
569         ValidatePipeline();
570         ValidatePipelineLayout();
571 
572         auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
573         if (data) {
574             data->drawType = DrawType::DRAW_INDEXED;
575             data->vertexCount = 0;
576             data->instanceCount = instanceCount;
577             data->firstVertex = 0;
578             data->firstInstance = firstInstance;
579             data->indexCount = indexCount;
580             data->firstIndex = firstIndex;
581             data->vertexOffset = vertexOffset;
582 
583             renderCommands_.push_back({ RenderCommandType::DRAW, data });
584         }
585     }
586 }
587 
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)588 void RenderCommandList::DrawIndirect(
589     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
590 {
591 #if (RENDER_VALIDATION_ENABLED == 1)
592     if (!stateData_.renderPassHasBegun) {
593         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
594     }
595     if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
596         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DI_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
597     }
598 #endif
599 
600     if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
601         ValidatePipeline();
602         ValidatePipelineLayout();
603 
604         auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
605         if (data) {
606             data->drawType = DrawType::DRAW_INDIRECT;
607             data->argsHandle = bufferHandle;
608             data->offset = offset;
609             data->drawCount = drawCount;
610             data->stride = stride;
611 
612             // add possible indirect buffer barrier before render pass
613             if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
614                 constexpr uint32_t drawIndirectCommandSize { 4U * sizeof(uint32_t) };
615                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
616                 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
617                 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
618             }
619 
620             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
621         }
622     }
623 }
624 
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)625 void RenderCommandList::DrawIndexedIndirect(
626     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
627 {
628 #if (RENDER_VALIDATION_ENABLED == 1)
629     if (!stateData_.renderPassHasBegun) {
630         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
631     }
632     if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
633         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DII_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
634     }
635 #endif
636 
637     if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
638         ValidatePipeline();
639         ValidatePipelineLayout();
640 
641         auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
642         if (data) {
643             data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
644             data->argsHandle = bufferHandle;
645             data->offset = offset;
646             data->drawCount = drawCount;
647             data->stride = stride;
648 
649             // add possible indirect buffer barrier before render pass
650             if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
651                 constexpr uint32_t drawIndirectCommandSize { 5U * sizeof(uint32_t) };
652                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
653                 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
654                 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
655             }
656 
657             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
658         }
659     }
660 }
661 
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)662 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
663 {
664     if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
665         ValidatePipeline();
666         ValidatePipelineLayout();
667 
668         AddBarrierPoint(RenderCommandType::DISPATCH);
669 
670         auto* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
671         if (data) {
672             data->groupCountX = groupCountX;
673             data->groupCountY = groupCountY;
674             data->groupCountZ = groupCountZ;
675 
676             renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
677         }
678     }
679 }
680 
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)681 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
682 {
683     ValidatePipeline();
684     ValidatePipelineLayout();
685 
686     AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
687 
688     auto* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
689     if (data) {
690         data->argsHandle = bufferHandle;
691         data->offset = offset;
692 
693         renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
694     }
695 }
696 
BindPipeline(const RenderHandle psoHandle)697 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
698 {
699     // NOTE: we cannot early out with the same pso handle
700     // the render pass and it's hashes might have been changed
701     // the final pso needs to be hashed with final render pass
702     // the backends try to check the re-binding of the same pipeline
703     // another approach would be to check when render pass changes to re-bind psos if needed
704 
705     bool valid = RenderHandleUtil::IsValid(psoHandle);
706 
707     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
708     PipelineBindPoint pipelineBindPoint {};
709     if (handleType == RenderHandleType::COMPUTE_PSO) {
710         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
711     } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
712         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
713     } else {
714         valid = false;
715     }
716 
717     stateData_.checkBindPipelineLayout = true;
718 #if (RENDER_VALIDATION_ENABLED == 1)
719     if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
720         if (!stateData_.renderPassHasBegun) {
721             valid = false;
722             PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_BindPipeline_",
723                 "RENDER_VALIDATION: RenderCommandList: bind pipeline after render pass begin.");
724         }
725     }
726 #endif
727 
728     stateData_.validPso = valid;
729     ValidatePipeline();
730 
731     stateData_.currentPsoHandle = psoHandle;
732     stateData_.currentPsoBindPoint = pipelineBindPoint;
733 
734     auto* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
735     if (data) {
736         data->psoHandle = psoHandle;
737         data->pipelineBindPoint = pipelineBindPoint;
738 
739         renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
740     }
741 }
742 
PushConstantData(const RENDER_NS::PushConstant & pushConstant,const BASE_NS::array_view<const uint8_t> data)743 void RenderCommandList::PushConstantData(
744     const RENDER_NS::PushConstant& pushConstant, const BASE_NS::array_view<const uint8_t> data)
745 {
746     ValidatePipeline();
747 
748     // push constant is not used/allocated if byte size is bigger than supported max
749     if ((pushConstant.byteSize > 0) &&
750         (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && (!data.empty())) {
751         auto* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
752         // use aligment of uint32 as currently the push constants are uint32s
753         // the data is allocated by shader/pipeline needs
754         uint8_t* pushData =
755             static_cast<uint8_t*>(AllocateRenderData(allocator_, std::alignment_of<uint32_t>(), pushConstant.byteSize));
756         if (rc && pushData) {
757             rc->psoHandle = stateData_.currentPsoHandle;
758             rc->pushConstant = pushConstant;
759             rc->data = pushData;
760             // the max amount of visible data is copied
761             const size_t minData = Math::min(static_cast<size_t>(pushConstant.byteSize), data.size_bytes());
762             CloneData(rc->data, pushConstant.byteSize, data.data(), minData);
763 
764             renderCommands_.push_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
765         }
766     } else if (pushConstant.byteSize > 0) {
767 #if (RENDER_VALIDATION_ENABLED == 1)
768         PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
769             PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
770 #endif
771     }
772 }
773 
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)774 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
775 {
776     if ((pushConstant.byteSize > 0) && data) {
777         PushConstantData(pushConstant, { data, pushConstant.byteSize });
778     }
779 }
780 
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)781 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
782 {
783     ValidatePipeline();
784 
785 #if (RENDER_VALIDATION_ENABLED == 1)
786     if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
787         PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
788     }
789 #endif
790 
791     if (vertexBuffers.empty()) {
792         return; // early out
793     }
794     auto* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
795     if (!data) {
796         return; // early out
797     }
798 
799     VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
800     uint32_t dynamicBarrierVertexBufferCount = 0;
801     const uint32_t vertexBufferCount =
802         Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, (uint32_t)vertexBuffers.size());
803     data->vertexBufferCount = vertexBufferCount;
804     RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
805     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
806         data->vertexBuffers[idx] = vertexBuffers[idx];
807         const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
808         if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
809             (vertexBuffers[idx].byteSize > 0)) {
810             // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
811             dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
812                 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
813             previousVbHandle = currVbHandle;
814         }
815     }
816 
817     // add possible vertex/index buffer barriers before render pass
818     if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
819         PLUGIN_ASSERT(stateData_.currentBarrierPoint);
820         stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
821         const size_t currCount = rpVertexInputBufferBarriers_.size();
822         rpVertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
823         for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
824             rpVertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
825         }
826     }
827 
828     renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
829 }
830 
BindIndexBuffer(const IndexBuffer & indexBuffer)831 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
832 {
833     ValidatePipeline();
834 
835     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
836 #if (RENDER_VALIDATION_ENABLED == 1)
837     if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
838         PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
839     }
840 #endif
841 
842     auto* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
843     if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
844         data->indexBuffer = indexBuffer;
845         if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
846             stateData_.currentBarrierPoint->vertexIndexBarrierCount++;
847             rpVertexInputBufferBarriers_.push_back(
848                 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
849         }
850         renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
851     }
852 }
853 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)854 void RenderCommandList::BeginRenderPass(
855     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
856 {
857 #if (RENDER_VALIDATION_ENABLED == 1)
858     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
859         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
860     }
861 #endif
862     if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
863 #if (RENDER_VALIDATION_ENABLED == 1)
864         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_subpass_",
865             "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
866             renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
867 #endif
868         stateData_.validCommandList = false;
869     }
870     ValidateRenderPass(renderPassDesc);
871     if (!stateData_.validCommandList) {
872         return;
873     }
874 
875     stateData_.renderPassHasBegun = true;
876     stateData_.renderPassStartIndex = 0;
877     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
878 
879     if (renderPassDesc.attachmentCount == 0) {
880         return;
881     }
882 #if (RENDER_VALIDATION_ENABLED == 1)
883     ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, subpassDescs);
884 #endif
885     AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
886 
887     if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
888         // NOTE: hashed in the backend
889         PLUGIN_ASSERT(renderPassDesc.subpassCount == (uint32_t)subpassDescs.size());
890 
891         data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
892         data->renderPassDesc = renderPassDesc;
893         data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
894         data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
895         data->subpassStartIndex = 0;
896         // if false -> initial layout is undefined
897         data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
898 
899         data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
900             renderPassDesc.subpassCount };
901         data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
902                                             allocator_, renderPassDesc.subpassCount),
903             renderPassDesc.subpassCount };
904         if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
905             return;
906         }
907 
908         CloneData(data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
909 
910         bool valid = true;
911         for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
912             const auto& subpassRef = subpassDescs[subpassIdx];
913 
914             RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
915             subpassResourceStates = {};
916 
917             valid = valid && ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
918             valid = valid && ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
919             valid = valid && ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
920             valid = valid && ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
921             valid = valid && ProcessFragmentShadingRateAttachments(renderPassDesc, subpassRef, subpassResourceStates);
922 #if (RENDER_VULKAN_FSR_ENABLED != 1)
923             data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
924 #endif
925         }
926         if (!valid) {
927             stateData_.validCommandList = false;
928         }
929 
930         // render pass layouts will be updated by render graph
931         renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
932     }
933 }
934 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)935 void RenderCommandList::BeginRenderPass(
936     const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
937 {
938 #if (RENDER_VALIDATION_ENABLED == 1)
939     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
940         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
941     }
942 #endif
943 
944     if (subpassStartIdx >= renderPassDesc.subpassCount) {
945         PLUGIN_LOG_E("RCL:BeginRenderPass: subpassStartIdx(%u) must be smaller than renderPassDesc.subpassCount (%u)",
946             subpassStartIdx, renderPassDesc.subpassCount);
947         stateData_.validCommandList = false;
948     }
949 
950     ValidateRenderPass(renderPassDesc);
951     if (!stateData_.validCommandList) {
952         return;
953     }
954 
955     stateData_.renderPassHasBegun = true;
956     stateData_.renderPassStartIndex = subpassStartIdx;
957     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
958 
959     if (renderPassDesc.attachmentCount > 0) {
960 #if (RENDER_VALIDATION_ENABLED == 1)
961         ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, { &subpassDesc, 1u });
962 #endif
963         AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
964 
965         if (hasMultiRpCommandListSubpasses_) {
966             PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
967             stateData_.validCommandList = false;
968         } else if (renderPassDesc.subpassCount > 1) {
969             hasMultiRpCommandListSubpasses_ = true;
970             multiRpCommandListData_.secondaryCmdLists =
971                 (renderPassDesc.subpassContents == CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS);
972             if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BARRIER_POINT)) {
973                 multiRpCommandListData_.rpBarrierCmdIndex = static_cast<uint32_t>(renderCommands_.size()) - 1u;
974             }
975         }
976         multiRpCommandListData_.subpassCount = renderPassDesc.subpassCount;
977         multiRpCommandListData_.rpBeginCmdIndex = static_cast<uint32_t>(renderCommands_.size());
978 
979         if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
980             // NOTE: hashed in the backend
981             data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
982             data->renderPassDesc = renderPassDesc;
983             data->subpassStartIndex = subpassStartIdx;
984             // if false -> initial layout is undefined
985             data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
986 
987             data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
988                 renderPassDesc.subpassCount };
989             data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
990                                                 allocator_, renderPassDesc.subpassCount),
991                 renderPassDesc.subpassCount };
992             if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
993                 return;
994             }
995 
996             bool valid = true;
997             for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
998                 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
999                 subpassResourceStates = {};
1000                 data->subpasses[subpassIdx] = {};
1001 
1002                 if (subpassIdx == subpassStartIdx) {
1003                     data->subpasses[subpassIdx] = subpassDesc;
1004                     valid = valid && ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1005                     valid = valid && ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1006                     valid = valid && ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1007                     valid = valid && ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1008                     valid = valid &&
1009                             ProcessFragmentShadingRateAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1010 #if (RENDER_VULKAN_FSR_ENABLED != 1)
1011                     data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
1012 #endif
1013                 }
1014             }
1015             if (!valid) {
1016                 stateData_.validCommandList = false;
1017             }
1018 
1019             // render pass layouts will be updated by render graph
1020             renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
1021         }
1022     }
1023 }
1024 
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1025 bool RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
1026     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1027 {
1028     bool valid = true;
1029     for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
1030         const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
1031         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1032         if (!RenderHandleUtil::IsGpuImage(handle)) {
1033             valid = false;
1034         }
1035 
1036         // NOTE: mipLevel and layers are not updated to GpuResourceState
1037         // NOTE: validation needed for invalid handles
1038         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1039         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1040         refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1041         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1042         refState.gpuQueue = gpuQueue_;
1043         // if used e.g. as input and color attachment use general layout
1044         if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
1045             subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
1046         } else {
1047             subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
1048                                                                  ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
1049                                                                  : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1050         }
1051 #if (RENDER_VALIDATION_ENABLED == 1)
1052         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1053             ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
1054 #endif
1055     }
1056     return valid;
1057 }
1058 
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1059 bool RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
1060     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1061 {
1062     bool valid = true;
1063     for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
1064         const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
1065         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1066         if (!RenderHandleUtil::IsGpuImage(handle)) {
1067             valid = false;
1068         }
1069 #if (RENDER_VALIDATION_ENABLED == 1)
1070         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1071             ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1072 #endif
1073 
1074         // NOTE: mipLevel and layers are not updated to GpuResourceState
1075         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1076         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1077         refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1078         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1079         refState.gpuQueue = gpuQueue_;
1080         // if used e.g. as input and color attachment use general layout
1081         subpassResourceStates.layouts[attachmentIndex] =
1082             (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
1083                 ? CORE_IMAGE_LAYOUT_GENERAL
1084                 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1085     }
1086     return valid;
1087 }
1088 
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1089 bool RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
1090     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1091 {
1092     bool valid = true;
1093     for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
1094         const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
1095         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1096         if (!RenderHandleUtil::IsGpuImage(handle)) {
1097             valid = false;
1098         }
1099 #if (RENDER_VALIDATION_ENABLED == 1)
1100         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1101             ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1102 #endif
1103 
1104         // NOTE: mipLevel and layers are not updated to GpuResourceState
1105         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1106         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1107         refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1108         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1109         refState.gpuQueue = gpuQueue_;
1110         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1111     }
1112     return valid;
1113 }
1114 
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1115 bool RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
1116     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1117 {
1118     bool valid = true;
1119     if (subpassRef.depthAttachmentCount == 1) {
1120         const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
1121         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1122         if (!RenderHandleUtil::IsDepthImage(handle)) {
1123             valid = false;
1124         }
1125 #if (RENDER_VALIDATION_ENABLED == 1)
1126         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1127             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1128             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1129 #endif
1130 
1131         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1132         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1133         refState.accessFlags |=
1134             (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1135         refState.pipelineStageFlags |=
1136             (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
1137         refState.gpuQueue = gpuQueue_;
1138         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1139     }
1140     if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
1141         const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
1142         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1143         if (!RenderHandleUtil::IsDepthImage(handle)) {
1144             valid = false;
1145         }
1146 #if (RENDER_VALIDATION_ENABLED == 1)
1147         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1148             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1149             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1150 #endif
1151 
1152         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1153         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1154         refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1155         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
1156         refState.gpuQueue = gpuQueue_;
1157         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1158     }
1159     return valid;
1160 }
1161 
ProcessFragmentShadingRateAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1162 bool RenderCommandList::ProcessFragmentShadingRateAttachments(const RenderPassDesc& renderPassDsc,
1163     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1164 {
1165     bool valid = true;
1166     if (subpassRef.fragmentShadingRateAttachmentCount == 1) {
1167 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1168         const uint32_t attachmentIndex = subpassRef.fragmentShadingRateAttachmentIndex;
1169         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1170         if (!RenderHandleUtil::IsGpuImage(handle)) {
1171             valid = false;
1172         }
1173 #if (RENDER_VALIDATION_ENABLED == 1)
1174         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1175             ImageUsageFlagBits::CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT,
1176             "CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT");
1177 #endif
1178 
1179         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1180         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1181         refState.accessFlags |= CORE_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT;
1182         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT;
1183         refState.gpuQueue = gpuQueue_;
1184         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL;
1185 #else
1186         PLUGIN_LOG_ONCE_I("vk_fsr_disabled_flag",
1187             "RENDER_VALIDATION: Fragment shading rate disabled and all related attachments ignored.");
1188 #endif
1189     }
1190     return valid;
1191 }
1192 
NextSubpass(const SubpassContents & subpassContents)1193 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
1194 {
1195     auto* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
1196     if (data) {
1197         data->subpassContents = subpassContents;
1198         data->renderCommandListIndex = 0; // will be updated in the render graph
1199 
1200         renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1201     }
1202 }
1203 
EndRenderPass()1204 void RenderCommandList::EndRenderPass()
1205 {
1206     if (!stateData_.renderPassHasBegun) {
1207 #if (RENDER_VALIDATION_ENABLED == 1)
1208         PLUGIN_LOG_ONCE_E(
1209             nodeName_ + "_RCL_EndRenderPass_", "RenderCommandList: render pass needs to begin before calling end");
1210 #endif
1211         stateData_.validCommandList = false;
1212         return;
1213     }
1214 
1215     if (hasMultiRpCommandListSubpasses_ && (multiRpCommandListData_.rpBeginCmdIndex != INVALID_CL_IDX)) {
1216         multiRpCommandListData_.rpEndCmdIndex = static_cast<uint32_t>(renderCommands_.size());
1217     }
1218 
1219     auto* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1220     if (data) {
1221         // will be updated in render graph if multi render command list render pass
1222         data->endType = RenderPassEndType::END_RENDER_PASS;
1223         data->subpassStartIndex = stateData_.renderPassStartIndex;
1224         data->subpassCount = stateData_.renderPassSubpassCount;
1225 
1226         renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1227     }
1228 
1229     stateData_.renderPassHasBegun = false;
1230     stateData_.renderPassStartIndex = 0;
1231     stateData_.renderPassSubpassCount = 0;
1232 }
1233 
BeginDisableAutomaticBarrierPoints()1234 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1235 {
1236 #if (RENDER_VALIDATION_ENABLED == 1)
1237     if (!stateData_.automaticBarriersEnabled) {
1238         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1239     }
1240 #endif
1241     PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1242 
1243     // barrier point for pending barriers
1244     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1245     stateData_.automaticBarriersEnabled = false;
1246 }
1247 
EndDisableAutomaticBarrierPoints()1248 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1249 {
1250 #if (RENDER_VALIDATION_ENABLED == 1)
1251     if (stateData_.automaticBarriersEnabled) {
1252         PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1253     }
1254 #endif
1255     PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1256 
1257     stateData_.automaticBarriersEnabled = true;
1258 }
1259 
AddCustomBarrierPoint()1260 void RenderCommandList::AddCustomBarrierPoint()
1261 {
1262     const bool barrierState = stateData_.automaticBarriersEnabled;
1263     stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1264     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1265     stateData_.automaticBarriersEnabled = barrierState;
1266 }
1267 
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1268 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1269 {
1270 #if (RENDER_VALIDATION_ENABLED == 1)
1271     if (stateData_.renderPassHasBegun) {
1272         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1273     }
1274 #endif
1275 
1276     CommandBarrier cb {
1277         RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1278         {
1279             source.accessFlags,
1280             source.pipelineStageFlags,
1281         },
1282         {},
1283         {
1284             destination.accessFlags,
1285             destination.pipelineStageFlags,
1286         },
1287         {},
1288     };
1289 
1290     customBarriers_.push_back(move(cb));
1291 
1292     stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1293 }
1294 
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1295 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1296     const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1297 {
1298     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1299 
1300 #if (RENDER_VALIDATION_ENABLED == 1)
1301     if (stateData_.renderPassHasBegun) {
1302         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1303     }
1304     if (byteSize == 0) {
1305         PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1306             "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1307     }
1308     if (handleType != RenderHandleType::GPU_BUFFER) {
1309         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1310     }
1311 #endif
1312 
1313     if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1314         ResourceBarrier src;
1315         src.accessFlags = source.accessFlags;
1316         src.pipelineStageFlags = source.pipelineStageFlags;
1317         src.optionalByteOffset = byteOffset;
1318         src.optionalByteSize = byteSize;
1319 
1320         ResourceBarrier dst;
1321         dst.accessFlags = destination.accessFlags;
1322         dst.pipelineStageFlags = destination.pipelineStageFlags;
1323         dst.optionalByteOffset = byteOffset;
1324         dst.optionalByteSize = byteSize;
1325 
1326         CommandBarrier cb {
1327             handle,
1328             src,
1329             {},
1330             dst,
1331             {},
1332         };
1333 
1334         customBarriers_.push_back(move(cb));
1335 
1336         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1337     }
1338 }
1339 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1340 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1341     const ImageSubresourceRange& imageSubresourceRange)
1342 {
1343     // specific layout MAX_ENUM to state that we fetch the correct state
1344     ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1345     CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1346 }
1347 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1348 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1349     const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1350 {
1351     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1352 
1353 #if (RENDER_VALIDATION_ENABLED == 1)
1354     if (stateData_.renderPassHasBegun) {
1355         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1356     }
1357     if (handleType != RenderHandleType::GPU_IMAGE) {
1358         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1359     }
1360     ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1361 #endif
1362 
1363     if (handleType == RenderHandleType::GPU_IMAGE) {
1364         ResourceBarrier src;
1365         src.accessFlags = source.accessFlags;
1366         src.pipelineStageFlags = source.pipelineStageFlags;
1367         src.optionalImageLayout = source.imageLayout;
1368         src.optionalImageSubresourceRange = imageSubresourceRange;
1369 
1370         ResourceBarrier dst;
1371         dst.accessFlags = destination.accessFlags;
1372         dst.pipelineStageFlags = destination.pipelineStageFlags;
1373         dst.optionalImageLayout = destination.imageLayout;
1374         dst.optionalImageSubresourceRange = imageSubresourceRange;
1375 
1376         CommandBarrier cb {
1377             handle,
1378             src,
1379             {},
1380             dst,
1381             {},
1382         };
1383 
1384         customBarriers_.push_back(cb);
1385 
1386         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1387     }
1388 }
1389 
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1390 void RenderCommandList::CopyBufferToBuffer(
1391     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1392 {
1393     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1394         // NOTE: combine copies, and only single combined barrier?
1395         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1396             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1397             AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1398         }
1399 
1400         auto* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1401         if (data) {
1402             data->srcHandle = sourceHandle;
1403             data->dstHandle = destinationHandle;
1404             data->bufferCopy = bufferCopy;
1405 
1406             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1407         }
1408     } else {
1409         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1410     }
1411 }
1412 
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1413 void RenderCommandList::CopyBufferToImage(
1414     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1415 {
1416     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1417         // NOTE: combine copies, and only single combined barrier?
1418         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1419             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1420             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1421         }
1422 
1423         auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1424         if (data) {
1425             data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1426             data->srcHandle = sourceHandle;
1427             data->dstHandle = destinationHandle;
1428             data->bufferImageCopy = bufferImageCopy;
1429 
1430             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1431         }
1432     } else {
1433         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1434     }
1435 }
1436 
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1437 void RenderCommandList::CopyImageToBuffer(
1438     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1439 {
1440     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1441         // NOTE: combine copies, and only single combined barrier?
1442         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1443             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1444             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1445         }
1446 
1447         auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1448         if (data) {
1449             data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1450             data->srcHandle = sourceHandle;
1451             data->dstHandle = destinationHandle;
1452             data->bufferImageCopy = bufferImageCopy;
1453 
1454             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1455         }
1456     } else {
1457         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1458     }
1459 }
1460 
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1461 void RenderCommandList::CopyImageToImage(
1462     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1463 {
1464     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1465         // NOTE: combine copies, and only single combined barrier?
1466         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1467             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1468             AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1469         }
1470 
1471         auto* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1472         if (data) {
1473             data->srcHandle = sourceHandle;
1474             data->dstHandle = destinationHandle;
1475             data->imageCopy = imageCopy;
1476 
1477             renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1478         }
1479     } else {
1480         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1481     }
1482 }
1483 
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1484 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1485     const ImageBlit& imageBlit, const Filter filter)
1486 {
1487     if (!stateData_.renderPassHasBegun) {
1488         if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1489             if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1490                 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1491                 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1492             }
1493 
1494             auto* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1495             if (data) {
1496                 data->srcHandle = sourceHandle;
1497                 data->dstHandle = destinationHandle;
1498                 data->imageBlit = imageBlit;
1499                 data->filter = filter;
1500                 // NOTE: desired layouts (barrier point needs to respect these)
1501                 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1502                 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1503 
1504                 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1505             }
1506         }
1507     } else {
1508         PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1509     }
1510 }
1511 
UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)1512 void RenderCommandList::UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,
1513     const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)
1514 {
1515 #if (RENDER_VALIDATION_ENABLED == 1)
1516     if (handles.size() != bindingResources.size()) {
1517         PLUGIN_LOG_W("RENDER_VALIDATION: UpdateDescriptorSets handles and bindingResources size does not match");
1518     }
1519 #endif
1520     const uint32_t count = static_cast<uint32_t>(Math::min(handles.size(), bindingResources.size()));
1521     if (count > 0U) {
1522         for (uint32_t idx = 0; idx < count; ++idx) {
1523             const auto& handleRef = handles[idx];
1524             const auto& bindingResRef = bindingResources[idx];
1525 #if (RENDER_VALIDATION_ENABLED == 1)
1526             ValidateDescriptorTypeBinding(nodeName_, gpuResourceMgr_, bindingResRef);
1527 #endif
1528             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handleRef);
1529             const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(handleRef);
1530 #if (RENDER_VALIDATION_ENABLED == 1)
1531             if (bindingResRef.bindingMask != bindingResRef.descriptorSetBindingMask) {
1532                 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSets_bm_",
1533                     "RENDER_VALIDATION: invalid bindings in descriptor set update (node:%s)", nodeName_.c_str());
1534             }
1535             if (handleType != RenderHandleType::DESCRIPTOR_SET) {
1536                 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1537             }
1538 #endif
1539             if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1540                 const DescriptorSetUpdateInfoFlags updateFlags =
1541                     nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handleRef, bindingResRef, gpuQueue_);
1542                 if ((updateFlags == DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_NEW_BIT) &&
1543                     ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) == 0U)) {
1544                     descriptorSetHandlesForUpdates_.push_back(handleRef);
1545                 } else if (updateFlags & DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_INVALID_BIT) {
1546 #if (RENDER_VALIDATION_ENABLED == 1)
1547                     PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSet_invalid_",
1548                         "RenderCommandList: invalid descriptor set bindings with update (node:%s)", nodeName_.c_str());
1549 #endif
1550                 }
1551             }
1552         }
1553     }
1554 }
1555 
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1556 void RenderCommandList::UpdateDescriptorSet(
1557     const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1558 {
1559     UpdateDescriptorSets({ &handle, 1U }, { &bindingResources, 1U });
1560 }
1561 
BindDescriptorSets(const uint32_t firstSet,const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)1562 void RenderCommandList::BindDescriptorSets(
1563     const uint32_t firstSet, const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)
1564 {
1565     if (descriptorSetData.empty()) {
1566         return;
1567     }
1568     const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(descriptorSetData.size());
1569     if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1570         PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1571             maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1572         return;
1573     }
1574 
1575     ValidatePipeline();
1576 
1577 #if (RENDER_VALIDATION_ENABLED == 1)
1578     if ((descriptorSetData.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
1579         PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1580     }
1581     for (const auto& ref : descriptorSetData) {
1582         if (ref.dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT) {
1583             PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1584         }
1585     }
1586 #endif
1587 
1588     RenderCommandBindDescriptorSets* data = nullptr;
1589     uint32_t descriptorSetCounterForBarriers = 0;
1590     uint32_t currSet = firstSet;
1591 
1592     // combine descriptor set bindings
1593     if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BIND_DESCRIPTOR_SETS)) {
1594         if (auto* prevCmd = static_cast<RenderCommandBindDescriptorSets*>(renderCommands_.back().rc); prevCmd) {
1595             if ((prevCmd->firstSet + prevCmd->setCount) == firstSet) {
1596                 // add sets
1597                 prevCmd->setCount += static_cast<uint32_t>(descriptorSetData.size());
1598                 prevCmd->setCount = Math::min(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, prevCmd->setCount);
1599                 data = prevCmd;
1600             }
1601         }
1602     }
1603 
1604     // new allocation
1605     bool newAllocation = false;
1606     if (!data) {
1607         if (data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1608             newAllocation = true;
1609 
1610             *data = {}; // default
1611 
1612             data->psoHandle = stateData_.currentPsoHandle;
1613             data->firstSet = firstSet;
1614             data->setCount = Math::min(
1615                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, static_cast<uint32_t>(descriptorSetData.size()));
1616         }
1617     }
1618 
1619     if (data) {
1620         for (const auto& ref : descriptorSetData) {
1621             if (currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1622                 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(ref.handle);
1623                 // flag also for only this descriptor set
1624                 bool globalDescSet = false;
1625                 if ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) != 0U) {
1626                     hasGlobalDescriptorSetBindings_ = true;
1627                     globalDescSet = true;
1628                 }
1629                 // allocate offsets for this set
1630                 if (!ref.dynamicOffsets.empty()) {
1631                     const auto dynCount = static_cast<uint32_t>(ref.dynamicOffsets.size());
1632                     if (auto* doData = AllocateRenderData<uint32_t>(allocator_, dynCount); doData) {
1633                         auto& dynRef = data->descriptorSetDynamicOffsets[currSet];
1634                         dynRef.dynamicOffsets = doData;
1635                         dynRef.dynamicOffsetCount = dynCount;
1636                         CloneData(dynRef.dynamicOffsets, dynCount * sizeof(uint32_t), ref.dynamicOffsets.data(),
1637                             ref.dynamicOffsets.size_bytes());
1638                     }
1639                 }
1640 
1641                 data->descriptorSetHandles[currSet] = ref.handle;
1642 
1643                 // NOTE: for global descriptor sets we do not know yet if they have dynamic resources
1644                 // The set might be updated from a random render node task / thread
1645                 const bool hasDynamicBarrierResources =
1646                     (globalDescSet) || nodeContextDescriptorSetManager_.HasDynamicBarrierResources(ref.handle);
1647                 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1648                     descriptorSetHandlesForBarriers_.push_back(ref.handle);
1649                     descriptorSetCounterForBarriers++;
1650                 }
1651                 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1652                 stateData_.currentBoundSets[currSet].descriptorSetHandle = ref.handle;
1653                 stateData_.currentBoundSetsMask |= (1 << currSet);
1654                 ++currSet;
1655             }
1656         }
1657 
1658         if (newAllocation) {
1659             renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1660         }
1661         // if the currentBarrierPoint is null there has been some invalid bindings earlier
1662         if (stateData_.renderPassHasBegun && stateData_.currentBarrierPoint) {
1663             // add possible barriers before render pass
1664             stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1665         } else if (stateData_.automaticBarriersEnabled) {
1666             stateData_.dirtyDescriptorSetsForBarriers = true;
1667         }
1668     }
1669 }
1670 
BindDescriptorSet(const uint32_t set,const BindDescriptorSetData & desriptorSetData)1671 void RenderCommandList::BindDescriptorSet(const uint32_t set, const BindDescriptorSetData& desriptorSetData)
1672 {
1673     BindDescriptorSets(set, { &desriptorSetData, 1U });
1674 }
1675 
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1676 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1677 {
1678     BindDescriptorSetData bdsd[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1679     const uint32_t count = Math::min((uint32_t)handles.size(), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1680     for (uint32_t idx = 0U; idx < count; ++idx) {
1681         bdsd[idx].handle = handles[idx];
1682     }
1683     BindDescriptorSets(firstSet, { bdsd, count });
1684 }
1685 
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1686 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1687 {
1688     BindDescriptorSetData bdsd = { handle, {} };
1689     BindDescriptorSets(set, { &bdsd, 1U });
1690 }
1691 
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1692 void RenderCommandList::BindDescriptorSet(
1693     const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1694 {
1695     BindDescriptorSetData bdsd = { handle, dynamicOffsets };
1696     BindDescriptorSets(set, { &bdsd, 1U });
1697 }
1698 
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1699 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1700     const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1701     const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1702     const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1703 {
1704     if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1705 #if (RENDER_VULKAN_RT_ENABLED == 1)
1706         RenderCommandBuildAccelerationStructure* data =
1707             AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1708         if (!data) {
1709             return; // early out
1710         }
1711         data->type = geometry.info.type;
1712         data->flags = geometry.info.flags;
1713         data->mode = geometry.info.mode;
1714         data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1715         data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1716         data->scratchBuffer = geometry.scratchBuffer.handle;
1717         data->scratchOffset = geometry.scratchBuffer.offset;
1718 
1719         if (!triangles.empty()) {
1720             AccelerationStructureGeometryTrianglesData* trianglesData =
1721                 static_cast<AccelerationStructureGeometryTrianglesData*>(
1722                     AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryTrianglesData>(),
1723                         sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1724             data->trianglesData = trianglesData;
1725             data->trianglesView = { data->trianglesData, triangles.size() };
1726             for (size_t idx = 0; idx < triangles.size(); ++idx) {
1727                 data->trianglesView[idx] = triangles[idx];
1728             }
1729         }
1730         if (!aabbs.empty()) {
1731             AccelerationStructureGeometryAabbsData* aabbsData = static_cast<AccelerationStructureGeometryAabbsData*>(
1732                 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryAabbsData>(),
1733                     sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1734             data->aabbsData = aabbsData;
1735             data->aabbsView = { data->aabbsData, aabbs.size() };
1736             for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1737                 data->aabbsView[idx] = aabbs[idx];
1738             }
1739         }
1740         if (!instances.empty()) {
1741             AccelerationStructureGeometryInstancesData* instancesData =
1742                 static_cast<AccelerationStructureGeometryInstancesData*>(
1743                     AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryInstancesData>(),
1744                         sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1745             data->instancesData = instancesData;
1746             data->instancesView = { data->instancesData, instances.size() };
1747             for (size_t idx = 0; idx < instances.size(); ++idx) {
1748                 data->instancesView[idx] = instances[idx];
1749             }
1750         }
1751         renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1752 #endif
1753     }
1754 }
1755 
ClearColorImage(const RenderHandle handle,const ClearColorValue color,const array_view<const ImageSubresourceRange> ranges)1756 void RenderCommandList::ClearColorImage(
1757     const RenderHandle handle, const ClearColorValue color, const array_view<const ImageSubresourceRange> ranges)
1758 {
1759 #if (RENDER_VALIDATION_ENABLED == 1)
1760     {
1761         if (!RenderHandleUtil::IsGpuImage(handle)) {
1762             PLUGIN_LOG_W("RENDER_VALIDATION: Invalid image handle given to ClearColorImage");
1763         }
1764         if (ranges.empty()) {
1765             PLUGIN_LOG_W("RENDER_VALIDATION: Invalid ranges given to ClearColorImage");
1766         }
1767         {
1768             const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(handle);
1769             if ((desc.usageFlags & CORE_IMAGE_USAGE_TRANSFER_DST_BIT) == 0) {
1770                 PLUGIN_LOG_E("RENDER_VALIDATION: Image missing usage flag TRANSFER_DST for ClearColorImage command");
1771             }
1772         }
1773     }
1774 #endif
1775     if (RenderHandleUtil::IsGpuImage(handle) && (!ranges.empty())) {
1776         AddBarrierPoint(RenderCommandType::CLEAR_COLOR_IMAGE);
1777 
1778         auto* data = AllocateRenderCommand<RenderCommandClearColorImage>(allocator_);
1779         if (data) {
1780             data->handle = handle;
1781             data->imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1782             data->color = color;
1783             data->ranges = { AllocateRenderData<ImageSubresourceRange>(
1784                                  allocator_, static_cast<uint32_t>(ranges.size())),
1785                 ranges.size() };
1786             if (!data->ranges.data()) {
1787                 return;
1788             }
1789             CloneData(data->ranges.data(), data->ranges.size_bytes(), ranges.data(), ranges.size_bytes());
1790 
1791             renderCommands_.push_back({ RenderCommandType::CLEAR_COLOR_IMAGE, data });
1792         }
1793     }
1794 }
1795 
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1796 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1797 {
1798 #if (RENDER_VALIDATION_ENABLED == 1)
1799     ValidateViewport(nodeName_, viewportDesc);
1800 #endif
1801     auto* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1802     if (data) {
1803         data->viewportDesc = viewportDesc;
1804         data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1805         data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1806         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1807     }
1808 }
1809 
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1810 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1811 {
1812 #if (RENDER_VALIDATION_ENABLED == 1)
1813     ValidateScissor(nodeName_, scissorDesc);
1814 #endif
1815     auto* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1816     if (data) {
1817         data->scissorDesc = scissorDesc;
1818         data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1819         data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1820         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1821     }
1822 }
1823 
SetDynamicStateLineWidth(const float lineWidth)1824 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1825 {
1826     auto* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1827     if (data) {
1828         data->lineWidth = lineWidth;
1829         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1830     }
1831 }
1832 
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1833 void RenderCommandList::SetDynamicStateDepthBias(
1834     const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1835 {
1836     auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1837     if (data) {
1838         data->depthBiasConstantFactor = depthBiasConstantFactor;
1839         data->depthBiasClamp = depthBiasClamp;
1840         data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1841         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1842     }
1843 }
1844 
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1845 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1846 {
1847     constexpr uint32_t THRESHOLD = 4;
1848 #if (RENDER_VALIDATION_ENABLED == 1)
1849     if (blendConstants.size() > THRESHOLD) {
1850         PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1851             THRESHOLD);
1852     }
1853 #endif
1854     auto* data = AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1855     if (data) {
1856         *data = {};
1857         const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1858         for (uint32_t idx = 0; idx < bcCount; ++idx) {
1859             data->blendConstants[idx] = blendConstants[idx];
1860         }
1861         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1862     }
1863 }
1864 
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1865 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1866 {
1867     auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1868     if (data) {
1869         data->minDepthBounds = minDepthBounds;
1870         data->maxDepthBounds = maxDepthBounds;
1871         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1872     }
1873 }
1874 
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1875 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1876 {
1877     auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1878     if (data) {
1879         data->dynamicState = StencilDynamicState::COMPARE_MASK;
1880         data->faceMask = faceMask;
1881         data->mask = compareMask;
1882         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1883     }
1884 }
1885 
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1886 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1887 {
1888     auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1889     if (data) {
1890         data->dynamicState = StencilDynamicState::WRITE_MASK;
1891         data->faceMask = faceMask;
1892         data->mask = writeMask;
1893         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1894     }
1895 }
1896 
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1897 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1898 {
1899     auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1900     if (data) {
1901         data->dynamicState = StencilDynamicState::REFERENCE;
1902         data->faceMask = faceMask;
1903         data->mask = reference;
1904         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1905     }
1906 }
1907 
SetDynamicStateFragmentShadingRate(const Size2D & fragmentSize,const FragmentShadingRateCombinerOps & combinerOps)1908 void RenderCommandList::SetDynamicStateFragmentShadingRate(
1909     const Size2D& fragmentSize, const FragmentShadingRateCombinerOps& combinerOps)
1910 {
1911     auto* data = AllocateRenderCommand<RenderCommandDynamicStateFragmentShadingRate>(allocator_);
1912     if (data) {
1913 #if (RENDER_VALIDATION_ENABLED == 1)
1914         ValidateFragmentShadingRate(fragmentSize);
1915 #endif
1916         // valid values for sizes from 0-4
1917         constexpr uint32_t maxValue { 4u };
1918         constexpr uint32_t valueMapper[maxValue + 1u] = { 1u, 1u, 2u, 2u, 4u };
1919         Size2D fs = fragmentSize;
1920         fs.width = (fs.width <= maxValue) ? valueMapper[fs.width] : maxValue;
1921         fs.height = (fs.height <= maxValue) ? valueMapper[fs.height] : maxValue;
1922 
1923         data->fragmentSize = fs;
1924         data->combinerOps = combinerOps;
1925         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE, data });
1926     }
1927 }
1928 
SetExecuteBackendFramePosition()1929 void RenderCommandList::SetExecuteBackendFramePosition()
1930 {
1931     if (stateData_.executeBackendFrameSet == false) {
1932         AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1933 
1934         auto* data = AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1935         if (data) {
1936             data->id = 0;
1937             renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1938             stateData_.executeBackendFrameSet = true;
1939         }
1940     } else {
1941         PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1942     }
1943 }
1944 
BeginDebugMarker(const BASE_NS::string_view name,const BASE_NS::Math::Vec4 color)1945 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name, const BASE_NS::Math::Vec4 color)
1946 {
1947 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1948     if (!name.empty()) {
1949         RenderCommandBeginDebugMarker* data = AllocateRenderCommand<RenderCommandBeginDebugMarker>(allocator_);
1950         if (data) {
1951 #if (RENDER_VALIDATION_ENABLED == 1)
1952             if (name.size() > RenderCommandBeginDebugMarker::SIZE_OF_NAME) {
1953                 PLUGIN_LOG_W("RENDER_VALIDATION: Debug marker name larger than (%u)",
1954                     RenderCommandBeginDebugMarker::SIZE_OF_NAME);
1955             }
1956 #endif
1957             data->name = name;
1958             data->color = { color };
1959             renderCommands_.push_back({ RenderCommandType::BEGIN_DEBUG_MARKER, data });
1960             debugMarkerStack_.stackCounter++;
1961         }
1962     }
1963 #endif
1964 }
1965 
BeginDebugMarker(const BASE_NS::string_view name)1966 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name)
1967 {
1968 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1969     BeginDebugMarker(name, { 1.0f, 1.0f, 1.0f, 1.0f });
1970 #endif
1971 }
1972 
EndDebugMarker()1973 void RenderCommandList::EndDebugMarker()
1974 {
1975 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1976     if (debugMarkerStack_.stackCounter > 0U) {
1977         RenderCommandEndDebugMarker* data = AllocateRenderCommand<RenderCommandEndDebugMarker>(allocator_);
1978         if (data) {
1979             data->id = 0;
1980             renderCommands_.push_back({ RenderCommandType::END_DEBUG_MARKER, data });
1981             debugMarkerStack_.stackCounter--;
1982         }
1983     }
1984 #endif
1985 }
1986 
ValidateRenderPass(const RenderPassDesc & renderPassDesc)1987 void RenderCommandList::ValidateRenderPass(const RenderPassDesc& renderPassDesc)
1988 {
1989     if (stateData_.renderPassHasBegun) {
1990 #if (RENDER_VALIDATION_ENABLED == 1)
1991         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_hasbegun_",
1992             "RenderCommandList: render pass is active, needs to be end before starting a new (node: %s)",
1993             nodeName_.c_str());
1994 #endif
1995         stateData_.validCommandList = false;
1996     }
1997     // validate render pass attachments
1998     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1999         if (!RenderHandleUtil::IsValid(renderPassDesc.attachmentHandles[idx])) {
2000 #if (RENDER_VALIDATION_ENABLED == 1)
2001             PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_attachments_",
2002                 "RenderCommandList: Invalid render pass attachment handle in index: %u (node:%s)", idx,
2003                 nodeName_.c_str());
2004 #endif
2005             stateData_.validCommandList = false;
2006         }
2007     }
2008 }
2009 
ValidatePipeline()2010 void RenderCommandList::ValidatePipeline()
2011 {
2012     if (!stateData_.validPso) {
2013         stateData_.validCommandList = false;
2014 #if (RENDER_VALIDATION_ENABLED == 1)
2015         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidatePipeline_", "RenderCommandList: PSO not bound.");
2016 #endif
2017     }
2018 }
2019 
ValidatePipelineLayout()2020 void RenderCommandList::ValidatePipelineLayout()
2021 {
2022     if (stateData_.checkBindPipelineLayout) {
2023         stateData_.checkBindPipelineLayout = false;
2024         // fast check without validation
2025         const uint32_t pipelineLayoutSetsMask =
2026             RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
2027         if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
2028 #if (RENDER_VALIDATION_ENABLED == 1)
2029             PLUGIN_LOG_ONCE_E(
2030                 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
2031 #endif
2032         }
2033 #if (RENDER_VALIDATION_ENABLED == 1)
2034         const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
2035         const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
2036                                        ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
2037                                        : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
2038         uint32_t plDescriptorSetCount = 0U;
2039         uint32_t bindCount = 0U;
2040         uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
2041         for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
2042             const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
2043             if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
2044                 bindCount++;
2045                 bindSetIndices[idx] = idx;
2046             }
2047             if (pl.descriptorSetLayouts[idx].set != PipelineLayoutConstants::INVALID_INDEX) {
2048                 plDescriptorSetCount++;
2049             }
2050         }
2051         if (bindCount < plDescriptorSetCount) {
2052             const auto debugName = nodeName_ + "not_all_pl_bound";
2053             PLUGIN_LOG_ONCE_E(nodeName_ + "not_all_pl_bound",
2054                 "RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
2055         }
2056 #endif
2057     }
2058 }
2059 
GetInterface(const BASE_NS::Uid & uid) const2060 const CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid) const
2061 {
2062     if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2063         return this;
2064     }
2065     return nullptr;
2066 }
2067 
GetInterface(const BASE_NS::Uid & uid)2068 CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid)
2069 {
2070     if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2071         return this;
2072     }
2073     return nullptr;
2074 }
2075 
Ref()2076 void RenderCommandList::Ref() {}
2077 
Unref()2078 void RenderCommandList::Unref() {}
2079 RENDER_END_NAMESPACE()
2080