• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_command_list.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26 
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "nodecontext/render_node_context_manager.h"
32 #include "util/linear_allocator.h"
33 #include "util/log.h"
34 
35 using namespace BASE_NS;
36 
37 RENDER_BEGIN_NAMESPACE()
38 namespace {
39 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)40 void ValidateImageUsageFlags(const GpuResourceManager& gpuResourceMgr, const RenderHandle handl,
41     const ImageUsageFlags imageUsageFlags, const string_view str)
42 {
43     if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
44         PLUGIN_LOG_E("RENDER_VALIDATION: gpu image (handle: %" PRIu64
45                      ") (name: %s), not created with needed flags: %s ",
46             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data());
47     }
48 }
49 
ValidateBufferUsageFlags(const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)50 void ValidateBufferUsageFlags(const GpuResourceManager& gpuResourceMgr, const RenderHandle handl,
51     const BufferUsageFlags bufferUsageFlags, const string_view str)
52 {
53     if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
54         PLUGIN_LOG_E("RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
55                      ") (name: %s), not created with needed flags: %s",
56             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data());
57     }
58 }
59 
ValidateDescriptorTypeBinding(const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)60 void ValidateDescriptorTypeBinding(
61     const GpuResourceManager& gpuResourceMgr, const DescriptorSetLayoutBindingResources& bindingRes)
62 {
63     for (const auto& ref : bindingRes.buffers) {
64         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
65             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
66                 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
67         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
68             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
69                 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
70         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
71             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
72                 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
73         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
74             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT,
75                 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
76         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
77             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
78                 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
79         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
80             ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT,
81                 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
82         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
83         } else {
84             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
85         }
86     }
87     for (const auto& ref : bindingRes.images) {
88         if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
89             (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
90             ValidateImageUsageFlags(
91                 gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT, "CORE_IMAGE_USAGE_SAMPLED_BIT");
92         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
93             ValidateImageUsageFlags(
94                 gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT, "CORE_IMAGE_USAGE_STORAGE_BIT");
95         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
96             ValidateImageUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
97                 "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
98         } else {
99             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
100         }
101     }
102     for (const auto& ref : bindingRes.samplers) {
103         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
104         } else {
105             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
106         }
107     }
108 }
109 
ValidateRenderPassAttachment(const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDsc)110 void ValidateRenderPassAttachment(const GpuResourceManager& gpuResourceMgr, const RenderPassDesc& renderPassDsc)
111 {
112     const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDsc.attachmentHandles[0]);
113     const uint32_t baseWidth = baseDesc.width;
114     const uint32_t baseHeight = baseDesc.height;
115     for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDsc.attachmentCount; ++attachmentIdx) {
116         const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDsc.attachmentHandles[attachmentIdx]);
117         if (desc.width != baseWidth || desc.height != baseHeight) {
118             PLUGIN_LOG_E("RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
119                 attachmentIdx);
120             PLUGIN_LOG_E("RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
121                 baseHeight, desc.width, desc.height);
122         }
123     }
124     if ((renderPassDsc.renderArea.extentWidth == 0) || (renderPassDsc.renderArea.extentHeight == 0)) {
125         PLUGIN_LOG_E("RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u)",
126             renderPassDsc.renderArea.extentWidth, renderPassDsc.renderArea.extentHeight);
127     }
128     if ((renderPassDsc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
129         (renderPassDsc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
130         PLUGIN_LOG_E(
131             "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
132             "%u, "
133             "baseHeight: %u)",
134             renderPassDsc.renderArea.offsetX, renderPassDsc.renderArea.offsetY, baseWidth, baseHeight);
135     }
136 }
137 
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)138 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
139     const ImageSubresourceRange& imageSubresourceRange)
140 {
141     const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
142     if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
143         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
144             imageSubresourceRange.baseMipLevel, desc.mipCount);
145     }
146     if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
147         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
148             imageSubresourceRange.baseArrayLayer, desc.layerCount);
149     }
150 }
151 
ValidateViewport(const ViewportDesc & vd)152 void ValidateViewport(const ViewportDesc& vd)
153 {
154     if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
155         PLUGIN_LOG_E(
156             "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger", vd.width, vd.height);
157     }
158 }
159 
ValidateScissor(const ScissorDesc & sd)160 void ValidateScissor(const ScissorDesc& sd)
161 {
162     if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
163         PLUGIN_LOG_E("RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero",
164             sd.extentWidth, sd.extentHeight);
165     }
166 }
167 #endif
168 
169 constexpr size_t MEMORY_ALIGNMENT { 16 };
170 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
171 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
172 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
173 
174 // automatic acquire and release barriers
175 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
176 
GetAlignedBytesize(const size_t byteSize,const size_t alignment)177 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
178 {
179     return (byteSize + alignment - 1) & (~(alignment - 1));
180 }
181 
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t byteSz)182 void* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, const size_t byteSz)
183 {
184     PLUGIN_ASSERT(byteSz > 0);
185     void* rc = nullptr;
186     if (!allocator.allocators.empty()) {
187         const size_t currentIndex = allocator.allocators.size() - 1;
188         rc = allocator.allocators[currentIndex]->Allocate(byteSz);
189     }
190 
191     if (rc == nullptr) { // current allocator is out of memory
192         size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
193         const size_t currentIndex = allocator.allocators.size();
194         if (currentIndex > 0) {
195             allocatorByteSize =
196                 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
197         }
198         allocator.allocators.emplace_back(make_unique<LinearAllocator>(allocatorByteSize, MEMORY_ALIGNMENT));
199 
200         rc = allocator.allocators[currentIndex]->Allocate(byteSz);
201         if (rc == nullptr) {
202             PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
203             PLUGIN_ASSERT(false);
204         }
205     }
206     return rc;
207 }
208 
209 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)210 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
211 {
212     return static_cast<T*>(AllocateRenderData(allocator, sizeof(T) * count));
213 }
214 
215 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)216 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
217 {
218     return static_cast<T*>(AllocateRenderData(allocator, sizeof(T)));
219 }
220 } // namespace
221 
RenderCommandList(NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)222 RenderCommandList::RenderCommandList(NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr,
223     const GpuResourceManager& gpuResourceMgr, const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue,
224     const bool enableMultiQueue)
225     : IRenderCommandList(),
226 #if (RENDER_VALIDATION_ENABLED == 1)
227       gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
228 #endif
229       nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
230       enableMultiQueue_(enableMultiQueue)
231 {}
232 
BeginFrame()233 void RenderCommandList::BeginFrame()
234 {
235     if (allocator_.allocators.size() == 1) { // size is good for this frame
236         allocator_.allocators[0]->Reset();
237     } else if (allocator_.allocators.size() > 1) {
238         size_t fullByteSize = 0;
239         size_t alignment = 0;
240         for (auto& ref : allocator_.allocators) {
241             fullByteSize += ref->GetCurrentByteSize();
242             alignment = Math::max(alignment, (size_t)ref->GetAlignment());
243             ref.reset();
244         }
245         allocator_.allocators.clear();
246 
247         // add some room for current frame allocation for new render commands
248         const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
249         fullByteSize += extraBytes;
250 
251         // create new single allocation for combined previous size and some extra bytes
252         const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
253         allocator_.allocators.emplace_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
254     }
255 
256     ResetStateData();
257 
258     const auto clearAndReserve = [](auto& vec) {
259         const size_t count = vec.size();
260         vec.clear();
261         vec.reserve(count);
262     };
263 
264     clearAndReserve(renderCommands_);
265     clearAndReserve(customBarriers_);
266     clearAndReserve(vertexInputBufferBarriers_);
267     clearAndReserve(descriptorSetHandlesForBarriers_);
268 
269     validReleaseAcquire_ = false;
270     hasMultiRenderCommandListSubpasses_ = false;
271     multiRendercommandListSubpassCount_ = 1;
272 }
273 
SetValidGpuQueueReleaseAcquireBarriers()274 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
275 {
276     if (enableMultiQueue_) {
277         validReleaseAcquire_ = true;
278     }
279 }
280 
BeforeRenderNodeExecuteFrame()281 void RenderCommandList::BeforeRenderNodeExecuteFrame()
282 {
283     // add possible barrier point for gpu queue transfer acquire
284     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
285         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
286     }
287 }
288 
AfterRenderNodeExecuteFrame()289 void RenderCommandList::AfterRenderNodeExecuteFrame()
290 {
291 #if (RENDER_VALIDATION_ENABLED == 1)
292     if (stateData_.renderPassHasBegun) {
293         PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
294     }
295     if (!stateData_.automaticBarriersEnabled) {
296         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
297     }
298 #endif
299 
300     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
301         if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
302             AddBarrierPoint(RenderCommandType::BARRIER_POINT);
303         }
304 
305         // add possible barrier point for gpu queue transfer release
306         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
307     }
308 }
309 
GetRenderCommands() const310 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
311 {
312 #if (RENDER_VALIDATION_ENABLED == 1)
313     if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
314         PLUGIN_LOG_E("RENDER_VALIDATION: invalid state data in render command list");
315     }
316 #endif
317 
318     return array_view<const RenderCommandWithType>(renderCommands_.data(), renderCommands_.size());
319 }
320 
HasValidRenderCommands() const321 bool RenderCommandList::HasValidRenderCommands() const
322 {
323     const uint32_t renderCommandCount = GetRenderCommandCount();
324     bool valid = false;
325     if (enableMultiQueue_) {
326         if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
327             // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
328             valid = validReleaseAcquire_;
329         } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
330             valid = true;
331         }
332     } else {
333         valid = (renderCommandCount > 0);
334     }
335     valid = valid && stateData_.validCommandList;
336 
337     return valid;
338 }
339 
GetRenderCommandCount() const340 uint32_t RenderCommandList::GetRenderCommandCount() const
341 {
342     return (uint32_t)renderCommands_.size();
343 }
344 
GetGpuQueue() const345 GpuQueue RenderCommandList::GetGpuQueue() const
346 {
347     return gpuQueue_;
348 }
349 
HasMultiRenderCommandListSubpasses() const350 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
351 {
352     return hasMultiRenderCommandListSubpasses_;
353 }
354 
GetMultiRenderCommandListSubpassCount() const355 uint32_t RenderCommandList::GetMultiRenderCommandListSubpassCount() const
356 {
357     return multiRendercommandListSubpassCount_;
358 }
359 
GetCustomBarriers() const360 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
361 {
362     return array_view<const CommandBarrier>(customBarriers_.data(), customBarriers_.size());
363 }
364 
GetVertexInputBufferBarriers() const365 array_view<const VertexBuffer> RenderCommandList::GetVertexInputBufferBarriers() const
366 {
367     return array_view<const VertexBuffer>(vertexInputBufferBarriers_.data(), vertexInputBufferBarriers_.size());
368 }
369 
GetDescriptorSetHandles() const370 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
371 {
372     return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
373 }
374 
AddBarrierPoint(const RenderCommandType renderCommandType)375 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
376 {
377     if (!stateData_.automaticBarriersEnabled) {
378         return; // no barrier point added
379     }
380 
381     RenderCommandBarrierPoint* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
382     if (data) {
383         *data = {}; // zero initialize
384 
385         data->renderCommandType = renderCommandType;
386         data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
387 
388         // update new index (within render pass there might not be any dirty descriptor sets at this stage)
389         const uint32_t descriptorSetBeginIndex = (uint32_t)descriptorSetHandlesForBarriers_.size();
390         data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
391         data->descriptorSetHandleCount = 0;
392         // update new index (only valid with render pass)
393         data->vertexIndexBarrierIndexBegin = (uint32_t)vertexInputBufferBarriers_.size();
394         data->vertexIndexBarrierCount = 0;
395 
396         // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
397         const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
398                                           renderCommandType == RenderCommandType::DISPATCH ||
399                                           renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
400         if (handleDescriptorSets) {
401             stateData_.dirtyDescriptorSetsForBarriers = false;
402             for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
403                 // only add descriptor set handles for barriers if there are dynamic barrier resources
404                 if (stateData_.currentBoundSets[idx].hasDynamicBarrierResources) {
405                     descriptorSetHandlesForBarriers_.emplace_back(stateData_.currentBoundSets[idx].descriptorSetHandle);
406                 }
407             }
408             data->descriptorSetHandleCount =
409                 (uint32_t)descriptorSetHandlesForBarriers_.size() - descriptorSetBeginIndex;
410         }
411 
412         const bool handleCustomBarriers =
413             ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
414         if (handleCustomBarriers) {
415             const int32_t newCount = (int32_t)customBarriers_.size() - stateData_.currentCustomBarrierIndices.prevSize;
416             if (newCount > 0) {
417                 data->customBarrierIndexBegin = (uint32_t)stateData_.currentCustomBarrierIndices.prevSize;
418                 data->customBarrierCount = (uint32_t)newCount;
419 
420                 stateData_.currentCustomBarrierIndices.prevSize = (int32_t)customBarriers_.size();
421                 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
422             }
423         }
424 
425         // store current barrier point for render command list
426         // * binding descriptor sets (with dynamic barrier resources)
427         // * binding vertex and index buffers (with dynamic barrier resources)
428         // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
429         stateData_.currentBarrierPoint = data;
430 
431         renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
432     }
433 }
434 
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)435 void RenderCommandList::Draw(
436     const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
437 {
438 #if (RENDER_VALIDATION_ENABLED == 1)
439     if (!stateData_.renderPassHasBegun) {
440         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
441     }
442 #endif
443 
444     if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
445         ValidatePipeline();
446         ValidatePipelineLayout();
447 
448         RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
449         if (data) {
450             data->drawType = DrawType::DRAW;
451             data->vertexCount = vertexCount;
452             data->instanceCount = instanceCount;
453             data->firstVertex = firstVertex;
454             data->firstInstance = firstInstance;
455             data->indexCount = 0;
456             data->firstIndex = 0;
457             data->vertexOffset = 0;
458 
459             renderCommands_.push_back({ RenderCommandType::DRAW, data });
460         }
461     }
462 }
463 
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)464 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
465     const int32_t vertexOffset, const uint32_t firstInstance)
466 {
467 #if (RENDER_VALIDATION_ENABLED == 1)
468     if (!stateData_.renderPassHasBegun) {
469         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
470     }
471 #endif
472 
473     if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
474         ValidatePipeline();
475         ValidatePipelineLayout();
476 
477         RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
478         if (data) {
479             data->drawType = DrawType::DRAW_INDEXED;
480             data->vertexCount = 0;
481             data->instanceCount = instanceCount;
482             data->firstVertex = 0;
483             data->firstInstance = firstInstance;
484             data->indexCount = indexCount;
485             data->firstIndex = firstIndex;
486             data->vertexOffset = vertexOffset;
487 
488             renderCommands_.push_back({ RenderCommandType::DRAW, data });
489         }
490     }
491 }
492 
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)493 void RenderCommandList::DrawIndirect(
494     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
495 {
496 #if (RENDER_VALIDATION_ENABLED == 1)
497     if (!stateData_.renderPassHasBegun) {
498         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
499     }
500 #endif
501 
502     if (stateData_.renderPassHasBegun) {
503         ValidatePipeline();
504         ValidatePipelineLayout();
505 
506         RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
507         if (data) {
508             data->drawType = DrawType::DRAW_INDIRECT;
509             data->argsHandle = bufferHandle;
510             data->offset = offset;
511             data->drawCount = drawCount;
512             data->stride = stride;
513 
514             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
515         }
516     }
517 }
518 
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)519 void RenderCommandList::DrawIndexedIndirect(
520     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
521 {
522 #if (RENDER_VALIDATION_ENABLED == 1)
523     if (!stateData_.renderPassHasBegun) {
524         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
525     }
526 #endif
527 
528     if (stateData_.renderPassHasBegun) {
529         ValidatePipeline();
530         ValidatePipelineLayout();
531 
532         RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
533         if (data) {
534             data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
535             data->argsHandle = bufferHandle;
536             data->offset = offset;
537             data->drawCount = drawCount;
538             data->stride = stride;
539 
540             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
541         }
542     }
543 }
544 
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)545 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
546 {
547     if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
548         ValidatePipeline();
549         ValidatePipelineLayout();
550 
551         AddBarrierPoint(RenderCommandType::DISPATCH);
552 
553         RenderCommandDispatch* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
554         if (data) {
555             data->groupCountX = groupCountX;
556             data->groupCountY = groupCountY;
557             data->groupCountZ = groupCountZ;
558 
559             renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
560         }
561     }
562 }
563 
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)564 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
565 {
566     ValidatePipeline();
567     ValidatePipelineLayout();
568 
569     AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
570 
571     RenderCommandDispatchIndirect* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
572     if (data) {
573         data->argsHandle = bufferHandle;
574         data->offset = offset;
575 
576         renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
577     }
578 }
579 
BindPipeline(const RenderHandle psoHandle)580 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
581 {
582     if (stateData_.currentPsoHandle.id == psoHandle.id) {
583         return; // early out
584     }
585 
586     bool valid = RenderHandleUtil::IsValid(psoHandle);
587 
588     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
589     PipelineBindPoint pipelineBindPoint {};
590     if (handleType == RenderHandleType::COMPUTE_PSO) {
591         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
592     } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
593         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
594     } else {
595         valid = false;
596     }
597 
598     stateData_.checkBindPipelineLayout = true;
599 #if (RENDER_VALIDATION_ENABLED == 1)
600     if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
601         if (!stateData_.renderPassHasBegun) {
602             valid = false;
603             PLUGIN_LOG_E("RENDER_VALIDATION: bind pipeline after render pass begin");
604         }
605     }
606 #endif
607 
608     stateData_.validPso = valid;
609     ValidatePipeline();
610 
611     stateData_.currentPsoHandle = psoHandle;
612     stateData_.currentPsoBindPoint = pipelineBindPoint;
613 
614     RenderCommandBindPipeline* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
615     if (data) {
616         data->psoHandle = psoHandle;
617         data->pipelineBindPoint = pipelineBindPoint;
618 
619         renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
620     }
621 }
622 
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)623 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
624 {
625     ValidatePipeline();
626 
627     // push constant is not used/allocated if byte size is bigger than supported max
628     if ((pushConstant.byteSize > 0) &&
629         (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && data) {
630         RenderCommandPushConstant* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
631         uint8_t* pushData = static_cast<uint8_t*>(AllocateRenderData(allocator_, pushConstant.byteSize));
632         if (rc && pushData) {
633             rc->psoHandle = stateData_.currentPsoHandle;
634             rc->pushConstant = pushConstant;
635             rc->data = pushData;
636             const bool res = CloneData(rc->data, pushConstant.byteSize, data, pushConstant.byteSize);
637             PLUGIN_UNUSED(res);
638             PLUGIN_ASSERT(res);
639 
640             renderCommands_.emplace_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
641         }
642     } else if (pushConstant.byteSize > 0) {
643 #if (RENDER_VALIDATION_ENABLED == 1)
644         PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
645             PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
646 #endif
647     }
648 }
649 
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)650 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
651 {
652     ValidatePipeline();
653 
654 #if (RENDER_VALIDATION_ENABLED == 1)
655     if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
656         PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
657     }
658 #endif
659 
660     if (!vertexBuffers.empty()) {
661         RenderCommandBindVertexBuffers* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
662         if (data) {
663             VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
664             uint32_t dynamicBarrierVertexBufferCount = 0;
665             const uint32_t vertexBufferCount =
666                 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, (uint32_t)vertexBuffers.size());
667             data->vertexBufferCount = vertexBufferCount;
668             RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
669             for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
670                 data->vertexBuffers[idx] = vertexBuffers[idx];
671                 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
672                 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
673                     (vertexBuffers[idx].byteSize > 0)) {
674                     // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
675                     dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
676                         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
677                     previousVbHandle = currVbHandle;
678                 }
679             }
680 
681             // add possible vertex/index buffer barriers before render pass
682             if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
683                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
684                 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
685                 const size_t currCount = vertexInputBufferBarriers_.size();
686                 vertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
687                 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
688                     vertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
689                 }
690             }
691 
692             renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
693         }
694     }
695 }
696 
BindIndexBuffer(const IndexBuffer & indexBuffer)697 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
698 {
699     ValidatePipeline();
700 
701     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
702 #if (RENDER_VALIDATION_ENABLED == 1)
703     if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
704         PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
705     }
706 #endif
707 
708     RenderCommandBindIndexBuffer* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
709     if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
710         data->indexBuffer = indexBuffer;
711         if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
712             vertexInputBufferBarriers_.push_back(
713                 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
714         }
715         renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
716     }
717 }
718 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)719 void RenderCommandList::BeginRenderPass(
720     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
721 {
722 #if (RENDER_VALIDATION_ENABLED == 1)
723     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
724         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
725     }
726 #endif
727 
728     if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
729         PLUGIN_LOG_E(
730             "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
731             renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
732         stateData_.validCommandList = false;
733     }
734     if (stateData_.renderPassHasBegun) {
735         PLUGIN_LOG_E("RenderCommandList: render pass is active, needs to be end before starting a new");
736         stateData_.validCommandList = false;
737     }
738     stateData_.renderPassHasBegun = true;
739     stateData_.renderPassStartIndex = 0;
740     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
741 
742     if (renderPassDesc.attachmentCount > 0) {
743 #if (RENDER_VALIDATION_ENABLED == 1)
744         ValidateRenderPassAttachment(gpuResourceMgr_, renderPassDesc);
745 #endif
746         AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
747 
748         if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
749             // NOTE: hashed in the backend
750             PLUGIN_ASSERT(renderPassDesc.subpassCount == (uint32_t)subpassDescs.size());
751 
752             data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
753             data->renderPassDesc = renderPassDesc;
754             data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
755             data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
756             data->subpassStartIndex = 0;
757             // if false -> initial layout is undefined
758             data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
759 
760             data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
761                 renderPassDesc.subpassCount };
762             if (!data->subpasses.data()) {
763                 return;
764             }
765 
766             data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
767                                                 allocator_, renderPassDesc.subpassCount),
768                 renderPassDesc.subpassCount };
769             if (!data->subpassResourceStates.data()) {
770                 return;
771             }
772 
773             CloneData(
774                 data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
775 
776             for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
777                 const auto& subpassRef = subpassDescs[subpassIdx];
778 
779                 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
780                 subpassResourceStates = {};
781 
782                 ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
783                 ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
784                 ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
785                 ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
786             }
787 
788             // render pass layouts will be updated by render graph
789             renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
790         }
791     }
792 }
793 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)794 void RenderCommandList::BeginRenderPass(
795     const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
796 {
797 #if (RENDER_VALIDATION_ENABLED == 1)
798     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
799         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
800     }
801 #endif
802 
803     if (stateData_.renderPassHasBegun) {
804         PLUGIN_LOG_E("BeginRenderPass: render pass is active, needs to be end before starting a new");
805         stateData_.validCommandList = false;
806     }
807     if (subpassStartIdx >= renderPassDesc.subpassCount) {
808         PLUGIN_LOG_E(
809             "RenderCommandList: BeginRenderPass: subpassStartIdx (%u) must be smaller than renderPassDesc.subpassCount "
810             "(%u)",
811             subpassStartIdx, renderPassDesc.subpassCount);
812         stateData_.validCommandList = false;
813     }
814 
815     if (hasMultiRenderCommandListSubpasses_) {
816         PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
817         stateData_.validCommandList = false;
818     } else if (renderPassDesc.subpassCount > 1) {
819         hasMultiRenderCommandListSubpasses_ = true;
820         multiRendercommandListSubpassCount_ = renderPassDesc.subpassCount;
821     }
822 
823     stateData_.renderPassHasBegun = true;
824     stateData_.renderPassStartIndex = subpassStartIdx;
825     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
826 
827     if (renderPassDesc.attachmentCount > 0) {
828 #if (RENDER_VALIDATION_ENABLED == 1)
829         ValidateRenderPassAttachment(gpuResourceMgr_, renderPassDesc);
830 #endif
831         AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
832 
833         if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
834             // NOTE: hashed in the backend
835             data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
836             data->renderPassDesc = renderPassDesc;
837             data->subpassStartIndex = subpassStartIdx;
838             // if false -> initial layout is undefined
839             data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
840 
841             data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
842                 renderPassDesc.subpassCount };
843             if (!data->subpasses.data()) {
844                 return;
845             }
846 
847             data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
848                                                 allocator_, renderPassDesc.subpassCount),
849                 renderPassDesc.subpassCount };
850             if (!data->subpassResourceStates.data()) {
851                 return;
852             }
853 
854             for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
855                 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
856                 subpassResourceStates = {};
857                 data->subpasses[subpassIdx] = {};
858 
859                 if (subpassIdx == subpassStartIdx) {
860                     data->subpasses[subpassIdx] = subpassDesc;
861                     ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
862                     ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
863                     ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
864                     ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
865                 }
866             }
867 
868             // render pass layouts will be updated by render graph
869             renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
870         }
871     }
872 }
873 
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)874 void RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
875     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
876 {
877     for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
878         const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
879 
880         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
881         // NOTE: mipLevel and layers are not updated to GpuResourceState
882         // NOTE: validation needed for invalid handles
883         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
884         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
885         refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
886         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
887         refState.gpuQueue = gpuQueue_;
888         // if used e.g. as input and color attachment use general layout
889         if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
890             subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
891         } else {
892             subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
893                                                                  ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
894                                                                  : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
895         }
896 #if (RENDER_VALIDATION_ENABLED == 1)
897         ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
898             "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
899 #endif
900     }
901 }
902 
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)903 void RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
904     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
905 {
906     for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
907         const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
908 
909         // NOTE: mipLevel and layers are not updated to GpuResourceState
910         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
911         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
912         refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
913         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
914         refState.gpuQueue = gpuQueue_;
915         // if used e.g. as input and color attachment use general layout
916         subpassResourceStates.layouts[attachmentIndex] =
917             (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
918                 ? CORE_IMAGE_LAYOUT_GENERAL
919                 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
920 
921 #if (RENDER_VALIDATION_ENABLED == 1)
922         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
923         ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
924             "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
925 #endif
926     }
927 }
928 
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)929 void RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
930     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
931 {
932     for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
933         const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
934 
935         // NOTE: mipLevel and layers are not updated to GpuResourceState
936         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
937         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
938         refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
939         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
940         refState.gpuQueue = gpuQueue_;
941         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
942 
943 #if (RENDER_VALIDATION_ENABLED == 1)
944         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
945         ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
946             "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
947 #endif
948     }
949 }
950 
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)951 void RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
952     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
953 {
954     if (subpassRef.depthAttachmentCount == 1) {
955         const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
956 
957         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
958         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
959         refState.accessFlags |=
960             (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
961         refState.pipelineStageFlags |=
962             (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
963         refState.gpuQueue = gpuQueue_;
964         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
965 
966 #if (RENDER_VALIDATION_ENABLED == 1)
967         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
968         ValidateImageUsageFlags(gpuResourceMgr_, handle,
969             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
970             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
971 #endif
972     }
973     if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
974         const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
975 
976         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
977         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
978         refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
979         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
980         refState.gpuQueue = gpuQueue_;
981         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
982 
983 #if (RENDER_VALIDATION_ENABLED == 1)
984         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
985         ValidateImageUsageFlags(gpuResourceMgr_, handle,
986             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
987             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
988 #endif
989     }
990 }
991 
NextSubpass(const SubpassContents & subpassContents)992 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
993 {
994     RenderCommandNextSubpass* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
995     if (data) {
996         data->subpassContents = subpassContents;
997         data->renderCommandListIndex = 0; // will be updated in the render graph
998 
999         renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1000     }
1001 }
1002 
EndRenderPass()1003 void RenderCommandList::EndRenderPass()
1004 {
1005     if (!stateData_.renderPassHasBegun) {
1006         PLUGIN_LOG_E("RenderCommandList: render pass needs to begin before calling end");
1007         stateData_.validCommandList = false;
1008     }
1009 
1010     RenderCommandEndRenderPass* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1011     if (data) {
1012         // will be updated in render graph if multi render command list render pass
1013         data->endType = RenderPassEndType::END_RENDER_PASS;
1014         data->subpassStartIndex = stateData_.renderPassStartIndex;
1015         data->subpassCount = stateData_.renderPassSubpassCount;
1016 
1017         renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1018     }
1019 
1020     stateData_.renderPassHasBegun = false;
1021     stateData_.renderPassStartIndex = 0;
1022     stateData_.renderPassSubpassCount = 0;
1023 }
1024 
BeginDisableAutomaticBarrierPoints()1025 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1026 {
1027 #if (RENDER_VALIDATION_ENABLED == 1)
1028     if (!stateData_.automaticBarriersEnabled) {
1029         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1030     }
1031 #endif
1032     PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1033 
1034     // barrier point for pending barriers
1035     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1036     stateData_.automaticBarriersEnabled = false;
1037 }
1038 
EndDisableAutomaticBarrierPoints()1039 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1040 {
1041 #if (RENDER_VALIDATION_ENABLED == 1)
1042     if (stateData_.automaticBarriersEnabled) {
1043         PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1044     }
1045 #endif
1046     PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1047 
1048     stateData_.automaticBarriersEnabled = true;
1049 }
1050 
AddCustomBarrierPoint()1051 void RenderCommandList::AddCustomBarrierPoint()
1052 {
1053     const bool barrierState = stateData_.automaticBarriersEnabled;
1054     stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1055     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1056     stateData_.automaticBarriersEnabled = barrierState;
1057 }
1058 
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1059 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1060 {
1061 #if (RENDER_VALIDATION_ENABLED == 1)
1062     if (stateData_.renderPassHasBegun) {
1063         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1064     }
1065 #endif
1066 
1067     CommandBarrier cb {
1068         RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1069         {
1070             source.accessFlags,
1071             source.pipelineStageFlags,
1072         },
1073         {},
1074         {
1075             destination.accessFlags,
1076             destination.pipelineStageFlags,
1077         },
1078         {},
1079     };
1080 
1081     customBarriers_.emplace_back(std::move(cb));
1082 
1083     stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1084 }
1085 
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1086 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1087     const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1088 {
1089     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1090 
1091 #if (RENDER_VALIDATION_ENABLED == 1)
1092     if (stateData_.renderPassHasBegun) {
1093         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1094     }
1095     if (byteSize == 0) {
1096         PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1097             "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1098     }
1099     if (handleType != RenderHandleType::GPU_BUFFER) {
1100         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1101     }
1102 #endif
1103 
1104     if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1105         ResourceBarrier src;
1106         src.accessFlags = source.accessFlags;
1107         src.pipelineStageFlags = source.pipelineStageFlags;
1108         src.optionalByteOffset = byteOffset;
1109         src.optionalByteSize = byteSize;
1110 
1111         ResourceBarrier dst;
1112         dst.accessFlags = destination.accessFlags;
1113         dst.pipelineStageFlags = destination.pipelineStageFlags;
1114         dst.optionalByteOffset = byteOffset;
1115         dst.optionalByteSize = byteSize;
1116 
1117         CommandBarrier cb {
1118             handle,
1119             std::move(src),
1120             {},
1121             std::move(dst),
1122             {},
1123         };
1124 
1125         customBarriers_.emplace_back(std::move(cb));
1126 
1127         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1128     }
1129 }
1130 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1131 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1132     const ImageSubresourceRange& imageSubresourceRange)
1133 {
1134     // specific layout MAX_ENUM to state that we fetch the correct state
1135     ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1136     CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1137 }
1138 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1139 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1140     const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1141 {
1142     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1143 
1144 #if (RENDER_VALIDATION_ENABLED == 1)
1145     if (stateData_.renderPassHasBegun) {
1146         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1147     }
1148     if (handleType != RenderHandleType::GPU_IMAGE) {
1149         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1150     }
1151     ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1152 #endif
1153 
1154     if (handleType == RenderHandleType::GPU_IMAGE) {
1155         ResourceBarrier src;
1156         src.accessFlags = source.accessFlags;
1157         src.pipelineStageFlags = source.pipelineStageFlags;
1158         src.optionalImageLayout = source.imageLayout;
1159         src.optionalImageSubresourceRange = imageSubresourceRange;
1160 
1161         ResourceBarrier dst;
1162         dst.accessFlags = destination.accessFlags;
1163         dst.pipelineStageFlags = destination.pipelineStageFlags;
1164         dst.optionalImageLayout = destination.imageLayout;
1165         dst.optionalImageSubresourceRange = imageSubresourceRange;
1166 
1167         CommandBarrier cb {
1168             handle,
1169             std::move(src),
1170             {},
1171             std::move(dst),
1172             {},
1173         };
1174 
1175         customBarriers_.emplace_back(std::move(cb));
1176 
1177         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1178     }
1179 }
1180 
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1181 void RenderCommandList::CopyBufferToBuffer(
1182     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1183 {
1184     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1185         // NOTE: combine copies, and only single combined barrier?
1186         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1187             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1188             AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1189         }
1190 
1191         RenderCommandCopyBuffer* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1192         if (data) {
1193             data->srcHandle = sourceHandle;
1194             data->dstHandle = destinationHandle;
1195             data->bufferCopy = bufferCopy;
1196 
1197             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1198         }
1199     } else {
1200         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1201     }
1202 }
1203 
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1204 void RenderCommandList::CopyBufferToImage(
1205     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1206 {
1207     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1208         // NOTE: combine copies, and only single combined barrier?
1209         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1210             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1211             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1212         }
1213 
1214         RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1215         if (data) {
1216             data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1217             data->srcHandle = sourceHandle;
1218             data->dstHandle = destinationHandle;
1219             data->bufferImageCopy = bufferImageCopy;
1220 
1221             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1222         }
1223     } else {
1224         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1225     }
1226 }
1227 
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1228 void RenderCommandList::CopyImageToBuffer(
1229     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1230 {
1231     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1232         // NOTE: combine copies, and only single combined barrier?
1233         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1234             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1235             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1236         }
1237 
1238         RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1239         if (data) {
1240             data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1241             data->srcHandle = sourceHandle;
1242             data->dstHandle = destinationHandle;
1243             data->bufferImageCopy = bufferImageCopy;
1244 
1245             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1246         }
1247     } else {
1248         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1249     }
1250 }
1251 
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1252 void RenderCommandList::CopyImageToImage(
1253     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1254 {
1255     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1256         // NOTE: combine copies, and only single combined barrier?
1257         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1258             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1259             AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1260         }
1261 
1262         RenderCommandCopyImage* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1263         if (data) {
1264             data->srcHandle = sourceHandle;
1265             data->dstHandle = destinationHandle;
1266             data->imageCopy = imageCopy;
1267 
1268             renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1269         }
1270     } else {
1271         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1272     }
1273 }
1274 
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1275 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1276     const ImageBlit& imageBlit, const Filter filter)
1277 {
1278     if (!stateData_.renderPassHasBegun) {
1279         if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1280             if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1281                 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1282                 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1283             }
1284 
1285             RenderCommandBlitImage* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1286             if (data) {
1287                 data->srcHandle = sourceHandle;
1288                 data->dstHandle = destinationHandle;
1289                 data->imageBlit = imageBlit;
1290                 data->filter = filter;
1291                 // NOTE: desired layouts (barrier point needs to respect these)
1292                 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1293                 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1294 
1295                 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1296             }
1297         }
1298     } else {
1299         PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1300     }
1301 }
1302 
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1303 void RenderCommandList::UpdateDescriptorSet(
1304     const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1305 {
1306 #if (RENDER_VALIDATION_ENABLED == 1)
1307     ValidateDescriptorTypeBinding(gpuResourceMgr_, bindingResources);
1308 #endif
1309 #if (RENDER_VALIDATION_ENABLED == 1)
1310     if (bindingResources.bindingMask != bindingResources.descriptorSetBindingMask) {
1311         PLUGIN_LOG_E("RENDER_VALIDATION: invalid bindings in descriptor set update");
1312     }
1313 #endif
1314     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1315     if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1316         nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handle, bindingResources, gpuQueue_);
1317         RenderCommandUpdateDescriptorSets* data = AllocateRenderCommand<RenderCommandUpdateDescriptorSets>(allocator_);
1318         if (data) {
1319             *data = {}; // default
1320             data->descriptorSetHandles[0] = handle;
1321 
1322             renderCommands_.push_back({ RenderCommandType::UPDATE_DESCRIPTOR_SETS, data });
1323         }
1324     } else {
1325         PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1326     }
1327 }
1328 
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles,const array_view<const uint32_t> dynamicOffsets)1329 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles,
1330     const array_view<const uint32_t> dynamicOffsets)
1331 {
1332     const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(handles.size());
1333     if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1334         PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1335             maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1336         return;
1337     }
1338 
1339     ValidatePipeline();
1340 
1341 #if (RENDER_VALIDATION_ENABLED == 1)
1342     if ((handles.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) ||
1343         (dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT)) {
1344         PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1345     }
1346 #endif
1347 
1348     if (auto* data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1349         *data = {}; // default
1350 
1351         if (!dynamicOffsets.empty()) {
1352             if (auto* doData = AllocateRenderData(allocator_, dynamicOffsets.size() * sizeof(uint32_t)); doData) {
1353                 data->dynamicOffsets = reinterpret_cast<uint32_t*>(doData);
1354                 data->dynamicOffsetCount = static_cast<uint32_t>(dynamicOffsets.size());
1355                 CloneData(data->dynamicOffsets, dynamicOffsets.size_bytes(), dynamicOffsets.data(),
1356                     dynamicOffsets.size_bytes());
1357             }
1358         }
1359 
1360         data->psoHandle = stateData_.currentPsoHandle;
1361         data->firstSet = firstSet;
1362         data->setCount = static_cast<uint32_t>(handles.size());
1363 
1364         uint32_t descriptorSetCounterForBarriers = 0;
1365         uint32_t currSet = firstSet;
1366         for (const RenderHandle currHandle : handles) {
1367             PLUGIN_ASSERT(currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1368             data->descriptorSetHandles[currSet] = currHandle;
1369 
1370             const bool hasDynamicBarrierResources =
1371                 nodeContextDescriptorSetManager_.HasDynamicBarrierResources(currHandle);
1372             if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1373                 descriptorSetHandlesForBarriers_.emplace_back(currHandle);
1374                 descriptorSetCounterForBarriers++;
1375             }
1376             stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1377             stateData_.currentBoundSets[currSet].descriptorSetHandle = currHandle;
1378             stateData_.currentBoundSetsMask |= (1 << currSet);
1379             ++currSet;
1380         }
1381 
1382         renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1383 
1384         if (stateData_.renderPassHasBegun) { // add possible barriers before render pass
1385             PLUGIN_ASSERT(stateData_.currentBarrierPoint);
1386             stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1387         } else if (stateData_.automaticBarriersEnabled) {
1388             stateData_.dirtyDescriptorSetsForBarriers = true;
1389         }
1390     }
1391 }
1392 
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1393 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1394 {
1395     BindDescriptorSets(firstSet, handles, {});
1396 }
1397 
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1398 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1399 {
1400     BindDescriptorSets(set, array_view<const RenderHandle>(&handle, 1), {});
1401 }
1402 
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1403 void RenderCommandList::BindDescriptorSet(
1404     const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1405 {
1406     BindDescriptorSets(set, array_view<const RenderHandle>(&handle, 1), dynamicOffsets);
1407 }
1408 
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1409 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1410     const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1411     const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1412     const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1413 {
1414     if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1415 #if (RENDER_VULKAN_RT_ENABLED == 1)
1416         RenderCommandBuildAccelerationStructure* data =
1417             AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1418         if (data) {
1419             data->type = geometry.info.type;
1420             data->flags = geometry.info.flags;
1421             data->mode = geometry.info.mode;
1422             data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1423             data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1424             data->scratchBuffer = geometry.scratchBuffer.handle;
1425             data->scratchOffset = geometry.scratchBuffer.offset;
1426 
1427             if (!triangles.empty()) {
1428                 AccelerationStructureGeometryTrianglesData* trianglesData =
1429                     static_cast<AccelerationStructureGeometryTrianglesData*>(AllocateRenderData(
1430                         allocator_, sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1431                 data->trianglesData = trianglesData;
1432                 data->trianglesView = { data->trianglesData, triangles.size() };
1433                 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1434                     data->trianglesView[idx] = triangles[idx];
1435                 }
1436             }
1437             if (!aabbs.empty()) {
1438                 AccelerationStructureGeometryAabbsData* aabbsData =
1439                     static_cast<AccelerationStructureGeometryAabbsData*>(
1440                         AllocateRenderData(allocator_, sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1441                 data->aabbsData = aabbsData;
1442                 data->aabbsView = { data->aabbsData, aabbs.size() };
1443                 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1444                     data->aabbsView[idx] = aabbs[idx];
1445                 }
1446             }
1447             if (!instances.empty()) {
1448                 AccelerationStructureGeometryInstancesData* instancesData =
1449                     static_cast<AccelerationStructureGeometryInstancesData*>(AllocateRenderData(
1450                         allocator_, sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1451                 data->instancesData = instancesData;
1452                 data->instancesView = { data->instancesData, instances.size() };
1453                 for (size_t idx = 0; idx < instances.size(); ++idx) {
1454                     data->instancesView[idx] = instances[idx];
1455                 }
1456             }
1457             renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1458         }
1459 #endif
1460     }
1461 }
1462 
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1463 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1464 {
1465 #if (RENDER_VALIDATION_ENABLED == 1)
1466     ValidateViewport(viewportDesc);
1467 #endif
1468     RenderCommandDynamicStateViewport* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1469     if (data) {
1470         data->viewportDesc = viewportDesc;
1471         data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1472         data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1473         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1474     }
1475 }
1476 
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1477 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1478 {
1479 #if (RENDER_VALIDATION_ENABLED == 1)
1480     ValidateScissor(scissorDesc);
1481 #endif
1482     RenderCommandDynamicStateScissor* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1483     if (data) {
1484         data->scissorDesc = scissorDesc;
1485         data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1486         data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1487         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1488     }
1489 }
1490 
SetDynamicStateLineWidth(const float lineWidth)1491 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1492 {
1493     RenderCommandDynamicStateLineWidth* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1494     if (data) {
1495         data->lineWidth = lineWidth;
1496         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1497     }
1498 }
1499 
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1500 void RenderCommandList::SetDynamicStateDepthBias(
1501     const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1502 {
1503     RenderCommandDynamicStateDepthBias* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1504     if (data) {
1505         data->depthBiasConstantFactor = depthBiasConstantFactor;
1506         data->depthBiasClamp = depthBiasClamp;
1507         data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1508         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1509     }
1510 }
1511 
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1512 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1513 {
1514     constexpr uint32_t THRESHOLD = 4;
1515 #if (RENDER_VALIDATION_ENABLED == 1)
1516     if (blendConstants.size() > THRESHOLD) {
1517         PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1518             THRESHOLD);
1519     }
1520 #endif
1521     RenderCommandDynamicStateBlendConstants* data =
1522         AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1523     if (data) {
1524         *data = {};
1525         const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1526         for (uint32_t idx = 0; idx < bcCount; ++idx) {
1527             data->blendConstants[idx] = blendConstants[idx];
1528         }
1529         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1530     }
1531 }
1532 
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1533 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1534 {
1535     RenderCommandDynamicStateDepthBounds* data =
1536         AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1537     if (data) {
1538         data->minDepthBounds = minDepthBounds;
1539         data->maxDepthBounds = maxDepthBounds;
1540         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1541     }
1542 }
1543 
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1544 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1545 {
1546     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1547     if (data) {
1548         data->dynamicState = StencilDynamicState::COMPARE_MASK;
1549         data->faceMask = faceMask;
1550         data->mask = compareMask;
1551         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1552     }
1553 }
1554 
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1555 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1556 {
1557     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1558     if (data) {
1559         data->dynamicState = StencilDynamicState::WRITE_MASK;
1560         data->faceMask = faceMask;
1561         data->mask = writeMask;
1562         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1563     }
1564 }
1565 
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1566 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1567 {
1568     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1569     if (data) {
1570         data->dynamicState = StencilDynamicState::REFERENCE;
1571         data->faceMask = faceMask;
1572         data->mask = reference;
1573         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1574     }
1575 }
1576 
SetExecuteBackendFramePosition()1577 void RenderCommandList::SetExecuteBackendFramePosition()
1578 {
1579     if (stateData_.executeBackendFrameSet == false) {
1580         AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1581 
1582         RenderCommandExecuteBackendFramePosition* data =
1583             AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1584         if (data) {
1585             data->id = 0;
1586             renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1587             stateData_.executeBackendFrameSet = true;
1588         }
1589     } else {
1590         PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1591     }
1592 }
1593 
ValidatePipeline()1594 void RenderCommandList::ValidatePipeline()
1595 {
1596     if (!stateData_.validPso) {
1597         stateData_.validCommandList = false;
1598         PLUGIN_LOG_E("RenderCommandList: pso not bound");
1599     }
1600 }
1601 
ValidatePipelineLayout()1602 void RenderCommandList::ValidatePipelineLayout()
1603 {
1604     if (stateData_.checkBindPipelineLayout) {
1605         stateData_.checkBindPipelineLayout = false;
1606         // fast check without validation
1607         const uint32_t pipelineLayoutSetsMask =
1608             RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
1609         if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
1610             PLUGIN_LOG_ONCE_E(
1611                 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
1612         }
1613 #if (RENDER_VALIDATION_ENABLED == 1)
1614         const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
1615         const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
1616                                        ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
1617                                        : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
1618         const uint32_t plDescriptorSetCount = pl.descriptorSetCount;
1619         uint32_t bindCount = 0;
1620         uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
1621         for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
1622             const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
1623             if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
1624                 bindCount++;
1625                 bindSetIndices[idx] = idx;
1626             }
1627         }
1628         if (bindCount < plDescriptorSetCount) {
1629             PLUGIN_LOG_E("RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
1630         }
1631 #endif
1632     }
1633 }
1634 RENDER_END_NAMESPACE()
1635