1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_command_list.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "util/linear_allocator.h"
32 #include "util/log.h"
33
34 using namespace BASE_NS;
35
36 RENDER_BEGIN_NAMESPACE()
37 PLUGIN_STATIC_ASSERT(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT == 4);
38 PLUGIN_STATIC_ASSERT(PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT == 8u);
39 namespace {
40 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)41 void ValidateImageUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
42 const RenderHandle handl, const ImageUsageFlags imageUsageFlags, const string_view str)
43 {
44 if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
45 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateImageUsageFlags_",
46 "RENDER_VALIDATION: gpu image (handle: %" PRIu64
47 ") (name: %s), not created with needed flags: %s, (node: %s)",
48 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
49 }
50 }
51
ValidateBufferUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)52 void ValidateBufferUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
53 const RenderHandle handl, const BufferUsageFlags bufferUsageFlags, const string_view str)
54 {
55 if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
56 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateBufferUsageFlags_",
57 "RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
58 ") (name: %s), not created with needed flags: %s, (node: %s)",
59 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
60 }
61 }
62
ValidateDescriptorTypeBinding(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)63 void ValidateDescriptorTypeBinding(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
64 const DescriptorSetLayoutBindingResources& bindingRes)
65 {
66 for (const auto& ref : bindingRes.buffers) {
67 if (!RenderHandleUtil::IsGpuBuffer(ref.resource.handle)) {
68 PLUGIN_LOG_E("RENDER_VALIDATION: invalid GPU buffer");
69 }
70 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
71 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
72 CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
73 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
74 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
75 CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
76 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
77 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
78 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
79 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
80 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
81 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
82 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
83 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
84 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
85 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
86 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
87 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
88 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
89 } else {
90 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
91 }
92 }
93 for (const auto& ref : bindingRes.images) {
94 if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
95 (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
96 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT,
97 "CORE_IMAGE_USAGE_SAMPLED_BIT");
98 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
99 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT,
100 "CORE_IMAGE_USAGE_STORAGE_BIT");
101 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
102 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
103 CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
104 } else {
105 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
106 }
107 }
108 for (const auto& ref : bindingRes.samplers) {
109 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
110 } else {
111 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
112 }
113 }
114 }
115
ValidateRenderPassAttachment(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)116 void ValidateRenderPassAttachment(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
117 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
118 {
119 const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[0]);
120 const uint32_t baseWidth = baseDesc.width;
121 const uint32_t baseHeight = baseDesc.height;
122 // NOTE: we do not check fragment shading rate attachment size
123 for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDesc.attachmentCount; ++attachmentIdx) {
124 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[attachmentIdx]);
125 if (desc.width != baseWidth || desc.height != baseHeight) {
126 for (const auto& subpassRef : subpassDescs) {
127 auto CheckAttachments = [](const auto& indices, const uint32_t count, const uint32_t attachmentIndex) {
128 for (uint32_t idx = 0; idx < count; ++idx) {
129 if (indices[idx] == attachmentIndex) {
130 return false;
131 }
132 }
133 return true;
134 };
135 bool valid = true;
136 valid &=
137 CheckAttachments(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount, attachmentIdx);
138 valid &=
139 CheckAttachments(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount, attachmentIdx);
140 valid &= CheckAttachments(
141 subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount, attachmentIdx);
142 if ((subpassRef.depthAttachmentIndex == attachmentIdx) ||
143 (subpassRef.depthResolveAttachmentIndex == attachmentIdx)) {
144 valid = false;
145 }
146 if (!valid) {
147 if (RenderHandleUtil::IsSwapchain(renderPassDesc.attachmentHandles[attachmentIdx]) &&
148 RenderHandleUtil::IsDepthImage(renderPassDesc.attachmentHandles[0])) {
149 PLUGIN_LOG_ONCE_W(nodeName + "_RCL_ValidateSize1_",
150 "RENDER_VALIDATION: Depth and swapchain input missmatch: baseWidth:%u baseHeight:%u "
151 "currWidth:%u currHeight:%u",
152 baseWidth, baseHeight, desc.width, desc.height);
153 } else {
154 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
155 "RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
156 attachmentIdx);
157 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
158 "RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
159 baseHeight, desc.width, desc.height);
160 }
161 }
162 }
163 }
164 }
165 if ((renderPassDesc.renderArea.extentWidth == 0) || (renderPassDesc.renderArea.extentHeight == 0)) {
166 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaExtent_",
167 "RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u) (node: %s)",
168 renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight, nodeName.data());
169 }
170 if ((renderPassDesc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
171 (renderPassDesc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
172 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaOffset_",
173 "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
174 "%u, baseHeight: %u, (node: %s)",
175 renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY, baseWidth, baseHeight,
176 nodeName.data());
177 }
178 }
179
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)180 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
181 const ImageSubresourceRange& imageSubresourceRange)
182 {
183 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
184 if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
185 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
186 imageSubresourceRange.baseMipLevel, desc.mipCount);
187 }
188 if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
189 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
190 imageSubresourceRange.baseArrayLayer, desc.layerCount);
191 }
192 }
193
ValidateViewport(const string_view nodeName,const ViewportDesc & vd)194 void ValidateViewport(const string_view nodeName, const ViewportDesc& vd)
195 {
196 if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
197 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateViewport_",
198 "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger (node: %s)", vd.width,
199 vd.height, nodeName.data());
200 }
201 }
202
ValidateScissor(const string_view nodeName,const ScissorDesc & sd)203 void ValidateScissor(const string_view nodeName, const ScissorDesc& sd)
204 {
205 if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
206 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateScissor_",
207 "RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero (node: %s)",
208 sd.extentWidth, sd.extentHeight, nodeName.data());
209 }
210 }
211
ValidateFragmentShadingRate(const Size2D & size)212 void ValidateFragmentShadingRate(const Size2D& size)
213 {
214 bool valid = true;
215 if ((size.width == 0) || (size.height == 0)) {
216 valid = false;
217 } else if ((size.width == 3u) || (size.height == 3u)) {
218 valid = false;
219 } else if ((size.width > 4u) || (size.height > 4u)) {
220 valid = false;
221 }
222 if (!valid) {
223 PLUGIN_LOG_W("RENDER_VALIDATION_ENABLED: fragmentSize must be less than or equal to 4 and the value must be a "
224 "power of two (width = %u, height = %u)",
225 size.width, size.height);
226 }
227 }
228 #endif // RENDER_VALIDATION_ENABLED
229
230 constexpr uint32_t INVALID_CL_IDX { ~0u };
231
232 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
233 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
234 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
235
236 // automatic acquire and release barriers
237 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
238
GetAlignedBytesize(const size_t byteSize,const size_t alignment)239 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
240 {
241 return (byteSize + alignment - 1) & (~(alignment - 1));
242 }
243
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t alignment,const size_t byteSz)244 void* AllocateRenderData(
245 RenderCommandList::LinearAllocatorStruct& allocator, const size_t alignment, const size_t byteSz)
246 {
247 PLUGIN_ASSERT(byteSz > 0);
248 void* rc = nullptr;
249 if (!allocator.allocators.empty()) {
250 const size_t currentIndex = allocator.allocators.size() - 1;
251 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
252 }
253
254 if (rc == nullptr) { // current allocator is out of memory
255 size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
256 const size_t currentIndex = allocator.allocators.size();
257 if (currentIndex > 0) {
258 allocatorByteSize =
259 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
260 }
261 allocator.allocators.push_back(make_unique<LinearAllocator>(allocatorByteSize));
262
263 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
264 if (rc == nullptr) {
265 PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
266 PLUGIN_ASSERT(false);
267 }
268 }
269 return rc;
270 }
271
272 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)273 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
274 {
275 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T) * count));
276 }
277
278 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)279 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
280 {
281 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T)));
282 }
283 } // namespace
284
RenderCommandList(const BASE_NS::string_view nodeName,NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)285 RenderCommandList::RenderCommandList(const BASE_NS::string_view nodeName,
286 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr, const GpuResourceManager& gpuResourceMgr,
287 const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue, const bool enableMultiQueue)
288 : IRenderCommandList(), nodeName_(nodeName),
289 #if (RENDER_VALIDATION_ENABLED == 1)
290 gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
291 #endif
292 nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
293 enableMultiQueue_(enableMultiQueue)
294 {}
295
BeginFrame()296 void RenderCommandList::BeginFrame()
297 {
298 if (allocator_.allocators.size() == 1) { // size is good for this frame
299 allocator_.allocators[0]->Reset();
300 } else if (allocator_.allocators.size() > 1) {
301 size_t fullByteSize = 0;
302 size_t alignment = 0;
303 for (auto& ref : allocator_.allocators) {
304 fullByteSize += ref->GetCurrentByteSize();
305 alignment = Math::max(alignment, (size_t)ref->GetAlignment());
306 ref.reset();
307 }
308 allocator_.allocators.clear();
309
310 // add some room for current frame allocation for new render commands
311 const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
312 fullByteSize += extraBytes;
313
314 // create new single allocation for combined previous size and some extra bytes
315 const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
316 allocator_.allocators.push_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
317 }
318
319 ResetStateData();
320
321 const auto clearAndReserve = [](auto& vec) {
322 const size_t count = vec.size();
323 vec.clear();
324 vec.reserve(count);
325 };
326
327 clearAndReserve(renderCommands_);
328 clearAndReserve(customBarriers_);
329 clearAndReserve(rpVertexInputBufferBarriers_);
330 clearAndReserve(rpIndirectBufferBarriers_);
331 clearAndReserve(descriptorSetHandlesForBarriers_);
332 clearAndReserve(descriptorSetHandlesForUpdates_);
333
334 validReleaseAcquire_ = false;
335 hasMultiRpCommandListSubpasses_ = false;
336 multiRpCommandListData_ = {};
337 hasGlobalDescriptorSetBindings_ = false;
338 }
339
SetValidGpuQueueReleaseAcquireBarriers()340 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
341 {
342 if (enableMultiQueue_) {
343 validReleaseAcquire_ = true;
344 }
345 }
346
BeforeRenderNodeExecuteFrame()347 void RenderCommandList::BeforeRenderNodeExecuteFrame()
348 {
349 // add possible barrier point for gpu queue transfer acquire
350 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
351 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
352 }
353 }
354
AfterRenderNodeExecuteFrame()355 void RenderCommandList::AfterRenderNodeExecuteFrame()
356 {
357 #if (RENDER_VALIDATION_ENABLED == 1)
358 if (stateData_.renderPassHasBegun) {
359 PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
360 }
361 if (!stateData_.automaticBarriersEnabled) {
362 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
363 }
364 #endif
365
366 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
367 if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
368 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
369 }
370
371 // add possible barrier point for gpu queue transfer release
372 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
373 }
374
375 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
376 for (uint32_t idx = debugMarkerStack_.stackCounter; idx > 0U; --idx) {
377 EndDebugMarker();
378 }
379 #endif
380 }
381
GetRenderCommands() const382 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
383 {
384 if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
385 #if (RENDER_VALIDATION_ENABLED == 1)
386 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_GetRenderCommands_",
387 "RenderCommandList: invalid state data in render command list (node: %s)", nodeName_.c_str());
388 #endif
389 return {};
390 } else {
391 return { renderCommands_.data(), renderCommands_.size() };
392 }
393 }
394
HasValidRenderCommands() const395 bool RenderCommandList::HasValidRenderCommands() const
396 {
397 const uint32_t renderCommandCount = GetRenderCommandCount();
398 bool valid = false;
399 if (enableMultiQueue_) {
400 if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
401 // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
402 valid = validReleaseAcquire_;
403 } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
404 valid = true;
405 }
406 } else {
407 valid = (renderCommandCount > 0);
408 }
409 valid = valid && stateData_.validCommandList;
410
411 return valid;
412 }
413
GetRenderCommandCount() const414 uint32_t RenderCommandList::GetRenderCommandCount() const
415 {
416 return (uint32_t)renderCommands_.size();
417 }
418
GetGpuQueue() const419 GpuQueue RenderCommandList::GetGpuQueue() const
420 {
421 return gpuQueue_;
422 }
423
HasMultiRenderCommandListSubpasses() const424 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
425 {
426 return hasMultiRpCommandListSubpasses_;
427 }
428
GetMultiRenderCommandListData() const429 MultiRenderPassCommandListData RenderCommandList::GetMultiRenderCommandListData() const
430 {
431 return multiRpCommandListData_;
432 }
433
HasGlobalDescriptorSetBindings() const434 bool RenderCommandList::HasGlobalDescriptorSetBindings() const
435 {
436 return hasGlobalDescriptorSetBindings_;
437 }
438
GetCustomBarriers() const439 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
440 {
441 return { customBarriers_.data(), customBarriers_.size() };
442 }
443
GetRenderpassVertexInputBufferBarriers() const444 array_view<const VertexBuffer> RenderCommandList::GetRenderpassVertexInputBufferBarriers() const
445 {
446 return { rpVertexInputBufferBarriers_.data(), rpVertexInputBufferBarriers_.size() };
447 }
448
GetRenderpassIndirectBufferBarriers() const449 array_view<const VertexBuffer> RenderCommandList::GetRenderpassIndirectBufferBarriers() const
450 {
451 return { rpIndirectBufferBarriers_.data(), rpIndirectBufferBarriers_.size() };
452 }
453
GetDescriptorSetHandles() const454 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
455 {
456 return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
457 }
458
GetUpdateDescriptorSetHandles() const459 array_view<const RenderHandle> RenderCommandList::GetUpdateDescriptorSetHandles() const
460 {
461 return { descriptorSetHandlesForUpdates_.data(), descriptorSetHandlesForUpdates_.size() };
462 }
463
AddBarrierPoint(const RenderCommandType renderCommandType)464 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
465 {
466 if (!stateData_.automaticBarriersEnabled) {
467 return; // no barrier point added
468 }
469
470 auto* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
471 if (!data) {
472 return; // early out
473 }
474 *data = {}; // zero initialize
475
476 data->renderCommandType = renderCommandType;
477 data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
478
479 // update new index (within render pass there might not be any dirty descriptor sets at this stage)
480 const auto descriptorSetBeginIndex = static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size());
481 data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
482 data->descriptorSetHandleCount = 0U;
483 // update new index (only valid with render pass)
484 data->vertexIndexBarrierIndexBegin = static_cast<uint32_t>(rpVertexInputBufferBarriers_.size());
485 data->vertexIndexBarrierCount = 0U;
486 // update new index (only valid with render pass)
487 data->indirectBufferBarrierIndexBegin = static_cast<uint32_t>(rpIndirectBufferBarriers_.size());
488 data->indirectBufferBarrierCount = 0U;
489
490 // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
491 const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
492 renderCommandType == RenderCommandType::DISPATCH ||
493 renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
494 if (handleDescriptorSets) {
495 stateData_.dirtyDescriptorSetsForBarriers = false;
496 for (auto& currentBoundSet : stateData_.currentBoundSets) {
497 // only add descriptor set handles for barriers if there are dynamic barrier resources
498 if (currentBoundSet.hasDynamicBarrierResources) {
499 descriptorSetHandlesForBarriers_.push_back(currentBoundSet.descriptorSetHandle);
500 }
501 }
502 data->descriptorSetHandleCount = (uint32_t)descriptorSetHandlesForBarriers_.size() - descriptorSetBeginIndex;
503 }
504
505 const bool handleCustomBarriers =
506 ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
507 if (handleCustomBarriers) {
508 const int32_t newCount = (int32_t)customBarriers_.size() - stateData_.currentCustomBarrierIndices.prevSize;
509 if (newCount > 0) {
510 data->customBarrierIndexBegin = (uint32_t)stateData_.currentCustomBarrierIndices.prevSize;
511 data->customBarrierCount = (uint32_t)newCount;
512
513 stateData_.currentCustomBarrierIndices.prevSize = (int32_t)customBarriers_.size();
514 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
515 }
516 }
517
518 // store current barrier point for render command list
519 // * binding descriptor sets (with dynamic barrier resources)
520 // * binding vertex and index buffers (with dynamic barrier resources)
521 // * indirect args buffer (with dynamic barrier resources)
522 // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
523 stateData_.currentBarrierPoint = data;
524
525 renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
526 }
527
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)528 void RenderCommandList::Draw(
529 const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
530 {
531 #if (RENDER_VALIDATION_ENABLED == 1)
532 if (!stateData_.renderPassHasBegun) {
533 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_Draw_",
534 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw)");
535 }
536 #endif
537
538 if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
539 ValidatePipeline();
540 ValidatePipelineLayout();
541
542 auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
543 if (data) {
544 data->drawType = DrawType::DRAW;
545 data->vertexCount = vertexCount;
546 data->instanceCount = instanceCount;
547 data->firstVertex = firstVertex;
548 data->firstInstance = firstInstance;
549 data->indexCount = 0;
550 data->firstIndex = 0;
551 data->vertexOffset = 0;
552
553 renderCommands_.push_back({ RenderCommandType::DRAW, data });
554 }
555 }
556 }
557
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)558 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
559 const int32_t vertexOffset, const uint32_t firstInstance)
560 {
561 #if (RENDER_VALIDATION_ENABLED == 1)
562 if (!stateData_.renderPassHasBegun) {
563 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DrawIndexed_",
564 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw).");
565 }
566 #endif
567
568 if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
569 ValidatePipeline();
570 ValidatePipelineLayout();
571
572 auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
573 if (data) {
574 data->drawType = DrawType::DRAW_INDEXED;
575 data->vertexCount = 0;
576 data->instanceCount = instanceCount;
577 data->firstVertex = 0;
578 data->firstInstance = firstInstance;
579 data->indexCount = indexCount;
580 data->firstIndex = firstIndex;
581 data->vertexOffset = vertexOffset;
582
583 renderCommands_.push_back({ RenderCommandType::DRAW, data });
584 }
585 }
586 }
587
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)588 void RenderCommandList::DrawIndirect(
589 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
590 {
591 #if (RENDER_VALIDATION_ENABLED == 1)
592 if (!stateData_.renderPassHasBegun) {
593 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
594 }
595 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
596 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DI_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
597 }
598 #endif
599
600 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
601 ValidatePipeline();
602 ValidatePipelineLayout();
603
604 auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
605 if (data) {
606 data->drawType = DrawType::DRAW_INDIRECT;
607 data->argsHandle = bufferHandle;
608 data->offset = offset;
609 data->drawCount = drawCount;
610 data->stride = stride;
611
612 // add possible indirect buffer barrier before render pass
613 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
614 constexpr uint32_t drawIndirectCommandSize { 4U * sizeof(uint32_t) };
615 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
616 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
617 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
618 }
619
620 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
621 }
622 }
623 }
624
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)625 void RenderCommandList::DrawIndexedIndirect(
626 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
627 {
628 #if (RENDER_VALIDATION_ENABLED == 1)
629 if (!stateData_.renderPassHasBegun) {
630 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
631 }
632 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
633 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DII_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
634 }
635 #endif
636
637 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
638 ValidatePipeline();
639 ValidatePipelineLayout();
640
641 auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
642 if (data) {
643 data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
644 data->argsHandle = bufferHandle;
645 data->offset = offset;
646 data->drawCount = drawCount;
647 data->stride = stride;
648
649 // add possible indirect buffer barrier before render pass
650 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
651 constexpr uint32_t drawIndirectCommandSize { 5U * sizeof(uint32_t) };
652 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
653 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
654 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
655 }
656
657 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
658 }
659 }
660 }
661
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)662 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
663 {
664 if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
665 ValidatePipeline();
666 ValidatePipelineLayout();
667
668 AddBarrierPoint(RenderCommandType::DISPATCH);
669
670 auto* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
671 if (data) {
672 data->groupCountX = groupCountX;
673 data->groupCountY = groupCountY;
674 data->groupCountZ = groupCountZ;
675
676 renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
677 }
678 }
679 }
680
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)681 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
682 {
683 ValidatePipeline();
684 ValidatePipelineLayout();
685
686 AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
687
688 auto* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
689 if (data) {
690 data->argsHandle = bufferHandle;
691 data->offset = offset;
692
693 renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
694 }
695 }
696
BindPipeline(const RenderHandle psoHandle)697 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
698 {
699 // NOTE: we cannot early out with the same pso handle
700 // the render pass and it's hashes might have been changed
701 // the final pso needs to be hashed with final render pass
702 // the backends try to check the re-binding of the same pipeline
703 // another approach would be to check when render pass changes to re-bind psos if needed
704
705 bool valid = RenderHandleUtil::IsValid(psoHandle);
706
707 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
708 PipelineBindPoint pipelineBindPoint {};
709 if (handleType == RenderHandleType::COMPUTE_PSO) {
710 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
711 } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
712 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
713 } else {
714 valid = false;
715 }
716
717 stateData_.checkBindPipelineLayout = true;
718 #if (RENDER_VALIDATION_ENABLED == 1)
719 if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
720 if (!stateData_.renderPassHasBegun) {
721 valid = false;
722 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_BindPipeline_",
723 "RENDER_VALIDATION: RenderCommandList: bind pipeline after render pass begin.");
724 }
725 }
726 #endif
727
728 stateData_.validPso = valid;
729 ValidatePipeline();
730
731 stateData_.currentPsoHandle = psoHandle;
732 stateData_.currentPsoBindPoint = pipelineBindPoint;
733
734 auto* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
735 if (data) {
736 data->psoHandle = psoHandle;
737 data->pipelineBindPoint = pipelineBindPoint;
738
739 renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
740 }
741 }
742
PushConstantData(const RENDER_NS::PushConstant & pushConstant,const BASE_NS::array_view<const uint8_t> data)743 void RenderCommandList::PushConstantData(
744 const RENDER_NS::PushConstant& pushConstant, const BASE_NS::array_view<const uint8_t> data)
745 {
746 ValidatePipeline();
747
748 // push constant is not used/allocated if byte size is bigger than supported max
749 if ((pushConstant.byteSize > 0) &&
750 (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && (!data.empty())) {
751 auto* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
752 // use aligment of uint32 as currently the push constants are uint32s
753 // the data is allocated by shader/pipeline needs
754 uint8_t* pushData =
755 static_cast<uint8_t*>(AllocateRenderData(allocator_, std::alignment_of<uint32_t>(), pushConstant.byteSize));
756 if (rc && pushData) {
757 rc->psoHandle = stateData_.currentPsoHandle;
758 rc->pushConstant = pushConstant;
759 rc->data = pushData;
760 // the max amount of visible data is copied
761 const size_t minData = Math::min(static_cast<size_t>(pushConstant.byteSize), data.size_bytes());
762 CloneData(rc->data, pushConstant.byteSize, data.data(), minData);
763
764 renderCommands_.push_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
765 }
766 } else if (pushConstant.byteSize > 0) {
767 #if (RENDER_VALIDATION_ENABLED == 1)
768 PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
769 PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
770 #endif
771 }
772 }
773
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)774 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
775 {
776 if ((pushConstant.byteSize > 0) && data) {
777 PushConstantData(pushConstant, { data, pushConstant.byteSize });
778 }
779 }
780
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)781 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
782 {
783 ValidatePipeline();
784
785 #if (RENDER_VALIDATION_ENABLED == 1)
786 if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
787 PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
788 }
789 #endif
790
791 if (vertexBuffers.empty()) {
792 return; // early out
793 }
794 auto* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
795 if (!data) {
796 return; // early out
797 }
798
799 VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
800 uint32_t dynamicBarrierVertexBufferCount = 0;
801 const uint32_t vertexBufferCount =
802 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, (uint32_t)vertexBuffers.size());
803 data->vertexBufferCount = vertexBufferCount;
804 RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
805 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
806 data->vertexBuffers[idx] = vertexBuffers[idx];
807 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
808 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
809 (vertexBuffers[idx].byteSize > 0)) {
810 // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
811 dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
812 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
813 previousVbHandle = currVbHandle;
814 }
815 }
816
817 // add possible vertex/index buffer barriers before render pass
818 if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
819 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
820 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
821 const size_t currCount = rpVertexInputBufferBarriers_.size();
822 rpVertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
823 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
824 rpVertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
825 }
826 }
827
828 renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
829 }
830
BindIndexBuffer(const IndexBuffer & indexBuffer)831 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
832 {
833 ValidatePipeline();
834
835 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
836 #if (RENDER_VALIDATION_ENABLED == 1)
837 if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
838 PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
839 }
840 #endif
841
842 auto* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
843 if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
844 data->indexBuffer = indexBuffer;
845 if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
846 stateData_.currentBarrierPoint->vertexIndexBarrierCount++;
847 rpVertexInputBufferBarriers_.push_back(
848 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
849 }
850 renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
851 }
852 }
853
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)854 void RenderCommandList::BeginRenderPass(
855 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
856 {
857 #if (RENDER_VALIDATION_ENABLED == 1)
858 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
859 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
860 }
861 #endif
862 if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
863 #if (RENDER_VALIDATION_ENABLED == 1)
864 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_subpass_",
865 "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
866 renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
867 #endif
868 stateData_.validCommandList = false;
869 }
870 ValidateRenderPass(renderPassDesc);
871 if (!stateData_.validCommandList) {
872 return;
873 }
874
875 stateData_.renderPassHasBegun = true;
876 stateData_.renderPassStartIndex = 0;
877 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
878
879 if (renderPassDesc.attachmentCount == 0) {
880 return;
881 }
882 #if (RENDER_VALIDATION_ENABLED == 1)
883 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, subpassDescs);
884 #endif
885 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
886
887 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
888 // NOTE: hashed in the backend
889 PLUGIN_ASSERT(renderPassDesc.subpassCount == (uint32_t)subpassDescs.size());
890
891 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
892 data->renderPassDesc = renderPassDesc;
893 data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
894 data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
895 data->subpassStartIndex = 0;
896 // if false -> initial layout is undefined
897 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
898
899 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
900 renderPassDesc.subpassCount };
901 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
902 allocator_, renderPassDesc.subpassCount),
903 renderPassDesc.subpassCount };
904 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
905 return;
906 }
907
908 CloneData(data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
909
910 bool valid = true;
911 for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
912 const auto& subpassRef = subpassDescs[subpassIdx];
913
914 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
915 subpassResourceStates = {};
916
917 valid = valid && ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
918 valid = valid && ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
919 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
920 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
921 valid = valid && ProcessFragmentShadingRateAttachments(renderPassDesc, subpassRef, subpassResourceStates);
922 #if (RENDER_VULKAN_FSR_ENABLED != 1)
923 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
924 #endif
925 }
926 if (!valid) {
927 stateData_.validCommandList = false;
928 }
929
930 // render pass layouts will be updated by render graph
931 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
932 }
933 }
934
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)935 void RenderCommandList::BeginRenderPass(
936 const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
937 {
938 #if (RENDER_VALIDATION_ENABLED == 1)
939 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
940 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
941 }
942 #endif
943
944 if (subpassStartIdx >= renderPassDesc.subpassCount) {
945 PLUGIN_LOG_E("RCL:BeginRenderPass: subpassStartIdx(%u) must be smaller than renderPassDesc.subpassCount (%u)",
946 subpassStartIdx, renderPassDesc.subpassCount);
947 stateData_.validCommandList = false;
948 }
949
950 ValidateRenderPass(renderPassDesc);
951 if (!stateData_.validCommandList) {
952 return;
953 }
954
955 stateData_.renderPassHasBegun = true;
956 stateData_.renderPassStartIndex = subpassStartIdx;
957 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
958
959 if (renderPassDesc.attachmentCount > 0) {
960 #if (RENDER_VALIDATION_ENABLED == 1)
961 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, { &subpassDesc, 1u });
962 #endif
963 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
964
965 if (hasMultiRpCommandListSubpasses_) {
966 PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
967 stateData_.validCommandList = false;
968 } else if (renderPassDesc.subpassCount > 1) {
969 hasMultiRpCommandListSubpasses_ = true;
970 multiRpCommandListData_.secondaryCmdLists =
971 (renderPassDesc.subpassContents == CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS);
972 if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BARRIER_POINT)) {
973 multiRpCommandListData_.rpBarrierCmdIndex = static_cast<uint32_t>(renderCommands_.size()) - 1u;
974 }
975 }
976 multiRpCommandListData_.subpassCount = renderPassDesc.subpassCount;
977 multiRpCommandListData_.rpBeginCmdIndex = static_cast<uint32_t>(renderCommands_.size());
978
979 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
980 // NOTE: hashed in the backend
981 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
982 data->renderPassDesc = renderPassDesc;
983 data->subpassStartIndex = subpassStartIdx;
984 // if false -> initial layout is undefined
985 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
986
987 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
988 renderPassDesc.subpassCount };
989 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
990 allocator_, renderPassDesc.subpassCount),
991 renderPassDesc.subpassCount };
992 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
993 return;
994 }
995
996 bool valid = true;
997 for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
998 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
999 subpassResourceStates = {};
1000 data->subpasses[subpassIdx] = {};
1001
1002 if (subpassIdx == subpassStartIdx) {
1003 data->subpasses[subpassIdx] = subpassDesc;
1004 valid = valid && ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1005 valid = valid && ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1006 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1007 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1008 valid = valid &&
1009 ProcessFragmentShadingRateAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1010 #if (RENDER_VULKAN_FSR_ENABLED != 1)
1011 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
1012 #endif
1013 }
1014 }
1015 if (!valid) {
1016 stateData_.validCommandList = false;
1017 }
1018
1019 // render pass layouts will be updated by render graph
1020 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
1021 }
1022 }
1023 }
1024
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1025 bool RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
1026 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1027 {
1028 bool valid = true;
1029 for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
1030 const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
1031 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1032 if (!RenderHandleUtil::IsGpuImage(handle)) {
1033 valid = false;
1034 }
1035
1036 // NOTE: mipLevel and layers are not updated to GpuResourceState
1037 // NOTE: validation needed for invalid handles
1038 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1039 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1040 refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1041 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1042 refState.gpuQueue = gpuQueue_;
1043 // if used e.g. as input and color attachment use general layout
1044 if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
1045 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
1046 } else {
1047 subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
1048 ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
1049 : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1050 }
1051 #if (RENDER_VALIDATION_ENABLED == 1)
1052 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1053 ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
1054 #endif
1055 }
1056 return valid;
1057 }
1058
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1059 bool RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
1060 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1061 {
1062 bool valid = true;
1063 for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
1064 const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
1065 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1066 if (!RenderHandleUtil::IsGpuImage(handle)) {
1067 valid = false;
1068 }
1069 #if (RENDER_VALIDATION_ENABLED == 1)
1070 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1071 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1072 #endif
1073
1074 // NOTE: mipLevel and layers are not updated to GpuResourceState
1075 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1076 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1077 refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1078 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1079 refState.gpuQueue = gpuQueue_;
1080 // if used e.g. as input and color attachment use general layout
1081 subpassResourceStates.layouts[attachmentIndex] =
1082 (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
1083 ? CORE_IMAGE_LAYOUT_GENERAL
1084 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1085 }
1086 return valid;
1087 }
1088
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1089 bool RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
1090 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1091 {
1092 bool valid = true;
1093 for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
1094 const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
1095 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1096 if (!RenderHandleUtil::IsGpuImage(handle)) {
1097 valid = false;
1098 }
1099 #if (RENDER_VALIDATION_ENABLED == 1)
1100 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1101 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1102 #endif
1103
1104 // NOTE: mipLevel and layers are not updated to GpuResourceState
1105 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1106 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1107 refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1108 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1109 refState.gpuQueue = gpuQueue_;
1110 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1111 }
1112 return valid;
1113 }
1114
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1115 bool RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
1116 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1117 {
1118 bool valid = true;
1119 if (subpassRef.depthAttachmentCount == 1) {
1120 const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
1121 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1122 if (!RenderHandleUtil::IsDepthImage(handle)) {
1123 valid = false;
1124 }
1125 #if (RENDER_VALIDATION_ENABLED == 1)
1126 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1127 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1128 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1129 #endif
1130
1131 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1132 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1133 refState.accessFlags |=
1134 (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1135 refState.pipelineStageFlags |=
1136 (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
1137 refState.gpuQueue = gpuQueue_;
1138 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1139 }
1140 if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
1141 const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
1142 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1143 if (!RenderHandleUtil::IsDepthImage(handle)) {
1144 valid = false;
1145 }
1146 #if (RENDER_VALIDATION_ENABLED == 1)
1147 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1148 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1149 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1150 #endif
1151
1152 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1153 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1154 refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1155 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
1156 refState.gpuQueue = gpuQueue_;
1157 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1158 }
1159 return valid;
1160 }
1161
ProcessFragmentShadingRateAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1162 bool RenderCommandList::ProcessFragmentShadingRateAttachments(const RenderPassDesc& renderPassDsc,
1163 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1164 {
1165 bool valid = true;
1166 if (subpassRef.fragmentShadingRateAttachmentCount == 1) {
1167 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1168 const uint32_t attachmentIndex = subpassRef.fragmentShadingRateAttachmentIndex;
1169 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1170 if (!RenderHandleUtil::IsGpuImage(handle)) {
1171 valid = false;
1172 }
1173 #if (RENDER_VALIDATION_ENABLED == 1)
1174 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1175 ImageUsageFlagBits::CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT,
1176 "CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT");
1177 #endif
1178
1179 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1180 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1181 refState.accessFlags |= CORE_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT;
1182 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT;
1183 refState.gpuQueue = gpuQueue_;
1184 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL;
1185 #else
1186 PLUGIN_LOG_ONCE_I("vk_fsr_disabled_flag",
1187 "RENDER_VALIDATION: Fragment shading rate disabled and all related attachments ignored.");
1188 #endif
1189 }
1190 return valid;
1191 }
1192
NextSubpass(const SubpassContents & subpassContents)1193 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
1194 {
1195 auto* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
1196 if (data) {
1197 data->subpassContents = subpassContents;
1198 data->renderCommandListIndex = 0; // will be updated in the render graph
1199
1200 renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1201 }
1202 }
1203
EndRenderPass()1204 void RenderCommandList::EndRenderPass()
1205 {
1206 if (!stateData_.renderPassHasBegun) {
1207 #if (RENDER_VALIDATION_ENABLED == 1)
1208 PLUGIN_LOG_ONCE_E(
1209 nodeName_ + "_RCL_EndRenderPass_", "RenderCommandList: render pass needs to begin before calling end");
1210 #endif
1211 stateData_.validCommandList = false;
1212 return;
1213 }
1214
1215 if (hasMultiRpCommandListSubpasses_ && (multiRpCommandListData_.rpBeginCmdIndex != INVALID_CL_IDX)) {
1216 multiRpCommandListData_.rpEndCmdIndex = static_cast<uint32_t>(renderCommands_.size());
1217 }
1218
1219 auto* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1220 if (data) {
1221 // will be updated in render graph if multi render command list render pass
1222 data->endType = RenderPassEndType::END_RENDER_PASS;
1223 data->subpassStartIndex = stateData_.renderPassStartIndex;
1224 data->subpassCount = stateData_.renderPassSubpassCount;
1225
1226 renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1227 }
1228
1229 stateData_.renderPassHasBegun = false;
1230 stateData_.renderPassStartIndex = 0;
1231 stateData_.renderPassSubpassCount = 0;
1232 }
1233
BeginDisableAutomaticBarrierPoints()1234 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1235 {
1236 #if (RENDER_VALIDATION_ENABLED == 1)
1237 if (!stateData_.automaticBarriersEnabled) {
1238 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1239 }
1240 #endif
1241 PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1242
1243 // barrier point for pending barriers
1244 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1245 stateData_.automaticBarriersEnabled = false;
1246 }
1247
EndDisableAutomaticBarrierPoints()1248 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1249 {
1250 #if (RENDER_VALIDATION_ENABLED == 1)
1251 if (stateData_.automaticBarriersEnabled) {
1252 PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1253 }
1254 #endif
1255 PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1256
1257 stateData_.automaticBarriersEnabled = true;
1258 }
1259
AddCustomBarrierPoint()1260 void RenderCommandList::AddCustomBarrierPoint()
1261 {
1262 const bool barrierState = stateData_.automaticBarriersEnabled;
1263 stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1264 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1265 stateData_.automaticBarriersEnabled = barrierState;
1266 }
1267
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1268 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1269 {
1270 #if (RENDER_VALIDATION_ENABLED == 1)
1271 if (stateData_.renderPassHasBegun) {
1272 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1273 }
1274 #endif
1275
1276 CommandBarrier cb {
1277 RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1278 {
1279 source.accessFlags,
1280 source.pipelineStageFlags,
1281 },
1282 {},
1283 {
1284 destination.accessFlags,
1285 destination.pipelineStageFlags,
1286 },
1287 {},
1288 };
1289
1290 customBarriers_.push_back(move(cb));
1291
1292 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1293 }
1294
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1295 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1296 const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1297 {
1298 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1299
1300 #if (RENDER_VALIDATION_ENABLED == 1)
1301 if (stateData_.renderPassHasBegun) {
1302 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1303 }
1304 if (byteSize == 0) {
1305 PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1306 "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1307 }
1308 if (handleType != RenderHandleType::GPU_BUFFER) {
1309 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1310 }
1311 #endif
1312
1313 if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1314 ResourceBarrier src;
1315 src.accessFlags = source.accessFlags;
1316 src.pipelineStageFlags = source.pipelineStageFlags;
1317 src.optionalByteOffset = byteOffset;
1318 src.optionalByteSize = byteSize;
1319
1320 ResourceBarrier dst;
1321 dst.accessFlags = destination.accessFlags;
1322 dst.pipelineStageFlags = destination.pipelineStageFlags;
1323 dst.optionalByteOffset = byteOffset;
1324 dst.optionalByteSize = byteSize;
1325
1326 CommandBarrier cb {
1327 handle,
1328 src,
1329 {},
1330 dst,
1331 {},
1332 };
1333
1334 customBarriers_.push_back(move(cb));
1335
1336 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1337 }
1338 }
1339
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1340 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1341 const ImageSubresourceRange& imageSubresourceRange)
1342 {
1343 // specific layout MAX_ENUM to state that we fetch the correct state
1344 ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1345 CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1346 }
1347
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1348 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1349 const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1350 {
1351 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1352
1353 #if (RENDER_VALIDATION_ENABLED == 1)
1354 if (stateData_.renderPassHasBegun) {
1355 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1356 }
1357 if (handleType != RenderHandleType::GPU_IMAGE) {
1358 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1359 }
1360 ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1361 #endif
1362
1363 if (handleType == RenderHandleType::GPU_IMAGE) {
1364 ResourceBarrier src;
1365 src.accessFlags = source.accessFlags;
1366 src.pipelineStageFlags = source.pipelineStageFlags;
1367 src.optionalImageLayout = source.imageLayout;
1368 src.optionalImageSubresourceRange = imageSubresourceRange;
1369
1370 ResourceBarrier dst;
1371 dst.accessFlags = destination.accessFlags;
1372 dst.pipelineStageFlags = destination.pipelineStageFlags;
1373 dst.optionalImageLayout = destination.imageLayout;
1374 dst.optionalImageSubresourceRange = imageSubresourceRange;
1375
1376 CommandBarrier cb {
1377 handle,
1378 src,
1379 {},
1380 dst,
1381 {},
1382 };
1383
1384 customBarriers_.push_back(cb);
1385
1386 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1387 }
1388 }
1389
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1390 void RenderCommandList::CopyBufferToBuffer(
1391 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1392 {
1393 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1394 // NOTE: combine copies, and only single combined barrier?
1395 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1396 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1397 AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1398 }
1399
1400 auto* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1401 if (data) {
1402 data->srcHandle = sourceHandle;
1403 data->dstHandle = destinationHandle;
1404 data->bufferCopy = bufferCopy;
1405
1406 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1407 }
1408 } else {
1409 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1410 }
1411 }
1412
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1413 void RenderCommandList::CopyBufferToImage(
1414 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1415 {
1416 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1417 // NOTE: combine copies, and only single combined barrier?
1418 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1419 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1420 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1421 }
1422
1423 auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1424 if (data) {
1425 data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1426 data->srcHandle = sourceHandle;
1427 data->dstHandle = destinationHandle;
1428 data->bufferImageCopy = bufferImageCopy;
1429
1430 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1431 }
1432 } else {
1433 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1434 }
1435 }
1436
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1437 void RenderCommandList::CopyImageToBuffer(
1438 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1439 {
1440 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1441 // NOTE: combine copies, and only single combined barrier?
1442 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1443 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1444 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1445 }
1446
1447 auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1448 if (data) {
1449 data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1450 data->srcHandle = sourceHandle;
1451 data->dstHandle = destinationHandle;
1452 data->bufferImageCopy = bufferImageCopy;
1453
1454 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1455 }
1456 } else {
1457 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1458 }
1459 }
1460
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1461 void RenderCommandList::CopyImageToImage(
1462 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1463 {
1464 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1465 // NOTE: combine copies, and only single combined barrier?
1466 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1467 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1468 AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1469 }
1470
1471 auto* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1472 if (data) {
1473 data->srcHandle = sourceHandle;
1474 data->dstHandle = destinationHandle;
1475 data->imageCopy = imageCopy;
1476
1477 renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1478 }
1479 } else {
1480 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1481 }
1482 }
1483
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1484 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1485 const ImageBlit& imageBlit, const Filter filter)
1486 {
1487 if (!stateData_.renderPassHasBegun) {
1488 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1489 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1490 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1491 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1492 }
1493
1494 auto* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1495 if (data) {
1496 data->srcHandle = sourceHandle;
1497 data->dstHandle = destinationHandle;
1498 data->imageBlit = imageBlit;
1499 data->filter = filter;
1500 // NOTE: desired layouts (barrier point needs to respect these)
1501 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1502 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1503
1504 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1505 }
1506 }
1507 } else {
1508 PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1509 }
1510 }
1511
UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)1512 void RenderCommandList::UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,
1513 const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)
1514 {
1515 #if (RENDER_VALIDATION_ENABLED == 1)
1516 if (handles.size() != bindingResources.size()) {
1517 PLUGIN_LOG_W("RENDER_VALIDATION: UpdateDescriptorSets handles and bindingResources size does not match");
1518 }
1519 #endif
1520 const uint32_t count = static_cast<uint32_t>(Math::min(handles.size(), bindingResources.size()));
1521 if (count > 0U) {
1522 for (uint32_t idx = 0; idx < count; ++idx) {
1523 const auto& handleRef = handles[idx];
1524 const auto& bindingResRef = bindingResources[idx];
1525 #if (RENDER_VALIDATION_ENABLED == 1)
1526 ValidateDescriptorTypeBinding(nodeName_, gpuResourceMgr_, bindingResRef);
1527 #endif
1528 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handleRef);
1529 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(handleRef);
1530 #if (RENDER_VALIDATION_ENABLED == 1)
1531 if (bindingResRef.bindingMask != bindingResRef.descriptorSetBindingMask) {
1532 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSets_bm_",
1533 "RENDER_VALIDATION: invalid bindings in descriptor set update (node:%s)", nodeName_.c_str());
1534 }
1535 if (handleType != RenderHandleType::DESCRIPTOR_SET) {
1536 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1537 }
1538 #endif
1539 if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1540 const DescriptorSetUpdateInfoFlags updateFlags =
1541 nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handleRef, bindingResRef, gpuQueue_);
1542 if ((updateFlags == DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_NEW_BIT) &&
1543 ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) == 0U)) {
1544 descriptorSetHandlesForUpdates_.push_back(handleRef);
1545 } else if (updateFlags & DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_INVALID_BIT) {
1546 #if (RENDER_VALIDATION_ENABLED == 1)
1547 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSet_invalid_",
1548 "RenderCommandList: invalid descriptor set bindings with update (node:%s)", nodeName_.c_str());
1549 #endif
1550 }
1551 }
1552 }
1553 }
1554 }
1555
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1556 void RenderCommandList::UpdateDescriptorSet(
1557 const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1558 {
1559 UpdateDescriptorSets({ &handle, 1U }, { &bindingResources, 1U });
1560 }
1561
BindDescriptorSets(const uint32_t firstSet,const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)1562 void RenderCommandList::BindDescriptorSets(
1563 const uint32_t firstSet, const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)
1564 {
1565 if (descriptorSetData.empty()) {
1566 return;
1567 }
1568 const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(descriptorSetData.size());
1569 if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1570 PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1571 maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1572 return;
1573 }
1574
1575 ValidatePipeline();
1576
1577 #if (RENDER_VALIDATION_ENABLED == 1)
1578 if ((descriptorSetData.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
1579 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1580 }
1581 for (const auto& ref : descriptorSetData) {
1582 if (ref.dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT) {
1583 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1584 }
1585 }
1586 #endif
1587
1588 RenderCommandBindDescriptorSets* data = nullptr;
1589 uint32_t descriptorSetCounterForBarriers = 0;
1590 uint32_t currSet = firstSet;
1591
1592 // combine descriptor set bindings
1593 if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BIND_DESCRIPTOR_SETS)) {
1594 if (auto* prevCmd = static_cast<RenderCommandBindDescriptorSets*>(renderCommands_.back().rc); prevCmd) {
1595 if ((prevCmd->firstSet + prevCmd->setCount) == firstSet) {
1596 // add sets
1597 prevCmd->setCount += static_cast<uint32_t>(descriptorSetData.size());
1598 prevCmd->setCount = Math::min(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, prevCmd->setCount);
1599 data = prevCmd;
1600 }
1601 }
1602 }
1603
1604 // new allocation
1605 bool newAllocation = false;
1606 if (!data) {
1607 if (data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1608 newAllocation = true;
1609
1610 *data = {}; // default
1611
1612 data->psoHandle = stateData_.currentPsoHandle;
1613 data->firstSet = firstSet;
1614 data->setCount = Math::min(
1615 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, static_cast<uint32_t>(descriptorSetData.size()));
1616 }
1617 }
1618
1619 if (data) {
1620 for (const auto& ref : descriptorSetData) {
1621 if (currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1622 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(ref.handle);
1623 // flag also for only this descriptor set
1624 bool globalDescSet = false;
1625 if ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) != 0U) {
1626 hasGlobalDescriptorSetBindings_ = true;
1627 globalDescSet = true;
1628 }
1629 // allocate offsets for this set
1630 if (!ref.dynamicOffsets.empty()) {
1631 const auto dynCount = static_cast<uint32_t>(ref.dynamicOffsets.size());
1632 if (auto* doData = AllocateRenderData<uint32_t>(allocator_, dynCount); doData) {
1633 auto& dynRef = data->descriptorSetDynamicOffsets[currSet];
1634 dynRef.dynamicOffsets = doData;
1635 dynRef.dynamicOffsetCount = dynCount;
1636 CloneData(dynRef.dynamicOffsets, dynCount * sizeof(uint32_t), ref.dynamicOffsets.data(),
1637 ref.dynamicOffsets.size_bytes());
1638 }
1639 }
1640
1641 data->descriptorSetHandles[currSet] = ref.handle;
1642
1643 // NOTE: for global descriptor sets we do not know yet if they have dynamic resources
1644 // The set might be updated from a random render node task / thread
1645 const bool hasDynamicBarrierResources =
1646 (globalDescSet) || nodeContextDescriptorSetManager_.HasDynamicBarrierResources(ref.handle);
1647 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1648 descriptorSetHandlesForBarriers_.push_back(ref.handle);
1649 descriptorSetCounterForBarriers++;
1650 }
1651 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1652 stateData_.currentBoundSets[currSet].descriptorSetHandle = ref.handle;
1653 stateData_.currentBoundSetsMask |= (1 << currSet);
1654 ++currSet;
1655 }
1656 }
1657
1658 if (newAllocation) {
1659 renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1660 }
1661 // if the currentBarrierPoint is null there has been some invalid bindings earlier
1662 if (stateData_.renderPassHasBegun && stateData_.currentBarrierPoint) {
1663 // add possible barriers before render pass
1664 stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1665 } else if (stateData_.automaticBarriersEnabled) {
1666 stateData_.dirtyDescriptorSetsForBarriers = true;
1667 }
1668 }
1669 }
1670
BindDescriptorSet(const uint32_t set,const BindDescriptorSetData & desriptorSetData)1671 void RenderCommandList::BindDescriptorSet(const uint32_t set, const BindDescriptorSetData& desriptorSetData)
1672 {
1673 BindDescriptorSets(set, { &desriptorSetData, 1U });
1674 }
1675
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1676 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1677 {
1678 BindDescriptorSetData bdsd[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1679 const uint32_t count = Math::min((uint32_t)handles.size(), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1680 for (uint32_t idx = 0U; idx < count; ++idx) {
1681 bdsd[idx].handle = handles[idx];
1682 }
1683 BindDescriptorSets(firstSet, { bdsd, count });
1684 }
1685
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1686 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1687 {
1688 BindDescriptorSetData bdsd = { handle, {} };
1689 BindDescriptorSets(set, { &bdsd, 1U });
1690 }
1691
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1692 void RenderCommandList::BindDescriptorSet(
1693 const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1694 {
1695 BindDescriptorSetData bdsd = { handle, dynamicOffsets };
1696 BindDescriptorSets(set, { &bdsd, 1U });
1697 }
1698
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1699 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1700 const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1701 const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1702 const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1703 {
1704 if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1705 #if (RENDER_VULKAN_RT_ENABLED == 1)
1706 RenderCommandBuildAccelerationStructure* data =
1707 AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1708 if (!data) {
1709 return; // early out
1710 }
1711 data->type = geometry.info.type;
1712 data->flags = geometry.info.flags;
1713 data->mode = geometry.info.mode;
1714 data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1715 data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1716 data->scratchBuffer = geometry.scratchBuffer.handle;
1717 data->scratchOffset = geometry.scratchBuffer.offset;
1718
1719 if (!triangles.empty()) {
1720 AccelerationStructureGeometryTrianglesData* trianglesData =
1721 static_cast<AccelerationStructureGeometryTrianglesData*>(
1722 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryTrianglesData>(),
1723 sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1724 data->trianglesData = trianglesData;
1725 data->trianglesView = { data->trianglesData, triangles.size() };
1726 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1727 data->trianglesView[idx] = triangles[idx];
1728 }
1729 }
1730 if (!aabbs.empty()) {
1731 AccelerationStructureGeometryAabbsData* aabbsData = static_cast<AccelerationStructureGeometryAabbsData*>(
1732 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryAabbsData>(),
1733 sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1734 data->aabbsData = aabbsData;
1735 data->aabbsView = { data->aabbsData, aabbs.size() };
1736 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1737 data->aabbsView[idx] = aabbs[idx];
1738 }
1739 }
1740 if (!instances.empty()) {
1741 AccelerationStructureGeometryInstancesData* instancesData =
1742 static_cast<AccelerationStructureGeometryInstancesData*>(
1743 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryInstancesData>(),
1744 sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1745 data->instancesData = instancesData;
1746 data->instancesView = { data->instancesData, instances.size() };
1747 for (size_t idx = 0; idx < instances.size(); ++idx) {
1748 data->instancesView[idx] = instances[idx];
1749 }
1750 }
1751 renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1752 #endif
1753 }
1754 }
1755
ClearColorImage(const RenderHandle handle,const ClearColorValue color,const array_view<const ImageSubresourceRange> ranges)1756 void RenderCommandList::ClearColorImage(
1757 const RenderHandle handle, const ClearColorValue color, const array_view<const ImageSubresourceRange> ranges)
1758 {
1759 #if (RENDER_VALIDATION_ENABLED == 1)
1760 {
1761 if (!RenderHandleUtil::IsGpuImage(handle)) {
1762 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid image handle given to ClearColorImage");
1763 }
1764 if (ranges.empty()) {
1765 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid ranges given to ClearColorImage");
1766 }
1767 {
1768 const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(handle);
1769 if ((desc.usageFlags & CORE_IMAGE_USAGE_TRANSFER_DST_BIT) == 0) {
1770 PLUGIN_LOG_E("RENDER_VALIDATION: Image missing usage flag TRANSFER_DST for ClearColorImage command");
1771 }
1772 }
1773 }
1774 #endif
1775 if (RenderHandleUtil::IsGpuImage(handle) && (!ranges.empty())) {
1776 AddBarrierPoint(RenderCommandType::CLEAR_COLOR_IMAGE);
1777
1778 auto* data = AllocateRenderCommand<RenderCommandClearColorImage>(allocator_);
1779 if (data) {
1780 data->handle = handle;
1781 data->imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1782 data->color = color;
1783 data->ranges = { AllocateRenderData<ImageSubresourceRange>(
1784 allocator_, static_cast<uint32_t>(ranges.size())),
1785 ranges.size() };
1786 if (!data->ranges.data()) {
1787 return;
1788 }
1789 CloneData(data->ranges.data(), data->ranges.size_bytes(), ranges.data(), ranges.size_bytes());
1790
1791 renderCommands_.push_back({ RenderCommandType::CLEAR_COLOR_IMAGE, data });
1792 }
1793 }
1794 }
1795
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1796 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1797 {
1798 #if (RENDER_VALIDATION_ENABLED == 1)
1799 ValidateViewport(nodeName_, viewportDesc);
1800 #endif
1801 auto* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1802 if (data) {
1803 data->viewportDesc = viewportDesc;
1804 data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1805 data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1806 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1807 }
1808 }
1809
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1810 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1811 {
1812 #if (RENDER_VALIDATION_ENABLED == 1)
1813 ValidateScissor(nodeName_, scissorDesc);
1814 #endif
1815 auto* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1816 if (data) {
1817 data->scissorDesc = scissorDesc;
1818 data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1819 data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1820 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1821 }
1822 }
1823
SetDynamicStateLineWidth(const float lineWidth)1824 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1825 {
1826 auto* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1827 if (data) {
1828 data->lineWidth = lineWidth;
1829 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1830 }
1831 }
1832
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1833 void RenderCommandList::SetDynamicStateDepthBias(
1834 const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1835 {
1836 auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1837 if (data) {
1838 data->depthBiasConstantFactor = depthBiasConstantFactor;
1839 data->depthBiasClamp = depthBiasClamp;
1840 data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1841 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1842 }
1843 }
1844
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1845 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1846 {
1847 constexpr uint32_t THRESHOLD = 4;
1848 #if (RENDER_VALIDATION_ENABLED == 1)
1849 if (blendConstants.size() > THRESHOLD) {
1850 PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1851 THRESHOLD);
1852 }
1853 #endif
1854 auto* data = AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1855 if (data) {
1856 *data = {};
1857 const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1858 for (uint32_t idx = 0; idx < bcCount; ++idx) {
1859 data->blendConstants[idx] = blendConstants[idx];
1860 }
1861 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1862 }
1863 }
1864
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1865 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1866 {
1867 auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1868 if (data) {
1869 data->minDepthBounds = minDepthBounds;
1870 data->maxDepthBounds = maxDepthBounds;
1871 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1872 }
1873 }
1874
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1875 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1876 {
1877 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1878 if (data) {
1879 data->dynamicState = StencilDynamicState::COMPARE_MASK;
1880 data->faceMask = faceMask;
1881 data->mask = compareMask;
1882 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1883 }
1884 }
1885
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1886 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1887 {
1888 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1889 if (data) {
1890 data->dynamicState = StencilDynamicState::WRITE_MASK;
1891 data->faceMask = faceMask;
1892 data->mask = writeMask;
1893 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1894 }
1895 }
1896
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1897 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1898 {
1899 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1900 if (data) {
1901 data->dynamicState = StencilDynamicState::REFERENCE;
1902 data->faceMask = faceMask;
1903 data->mask = reference;
1904 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1905 }
1906 }
1907
SetDynamicStateFragmentShadingRate(const Size2D & fragmentSize,const FragmentShadingRateCombinerOps & combinerOps)1908 void RenderCommandList::SetDynamicStateFragmentShadingRate(
1909 const Size2D& fragmentSize, const FragmentShadingRateCombinerOps& combinerOps)
1910 {
1911 auto* data = AllocateRenderCommand<RenderCommandDynamicStateFragmentShadingRate>(allocator_);
1912 if (data) {
1913 #if (RENDER_VALIDATION_ENABLED == 1)
1914 ValidateFragmentShadingRate(fragmentSize);
1915 #endif
1916 // valid values for sizes from 0-4
1917 constexpr uint32_t maxValue { 4u };
1918 constexpr uint32_t valueMapper[maxValue + 1u] = { 1u, 1u, 2u, 2u, 4u };
1919 Size2D fs = fragmentSize;
1920 fs.width = (fs.width <= maxValue) ? valueMapper[fs.width] : maxValue;
1921 fs.height = (fs.height <= maxValue) ? valueMapper[fs.height] : maxValue;
1922
1923 data->fragmentSize = fs;
1924 data->combinerOps = combinerOps;
1925 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE, data });
1926 }
1927 }
1928
SetExecuteBackendFramePosition()1929 void RenderCommandList::SetExecuteBackendFramePosition()
1930 {
1931 if (stateData_.executeBackendFrameSet == false) {
1932 AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1933
1934 auto* data = AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1935 if (data) {
1936 data->id = 0;
1937 renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1938 stateData_.executeBackendFrameSet = true;
1939 }
1940 } else {
1941 PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1942 }
1943 }
1944
BeginDebugMarker(const BASE_NS::string_view name,const BASE_NS::Math::Vec4 color)1945 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name, const BASE_NS::Math::Vec4 color)
1946 {
1947 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1948 if (!name.empty()) {
1949 RenderCommandBeginDebugMarker* data = AllocateRenderCommand<RenderCommandBeginDebugMarker>(allocator_);
1950 if (data) {
1951 #if (RENDER_VALIDATION_ENABLED == 1)
1952 if (name.size() > RenderCommandBeginDebugMarker::SIZE_OF_NAME) {
1953 PLUGIN_LOG_W("RENDER_VALIDATION: Debug marker name larger than (%u)",
1954 RenderCommandBeginDebugMarker::SIZE_OF_NAME);
1955 }
1956 #endif
1957 data->name = name;
1958 data->color = { color };
1959 renderCommands_.push_back({ RenderCommandType::BEGIN_DEBUG_MARKER, data });
1960 debugMarkerStack_.stackCounter++;
1961 }
1962 }
1963 #endif
1964 }
1965
BeginDebugMarker(const BASE_NS::string_view name)1966 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name)
1967 {
1968 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1969 BeginDebugMarker(name, { 1.0f, 1.0f, 1.0f, 1.0f });
1970 #endif
1971 }
1972
EndDebugMarker()1973 void RenderCommandList::EndDebugMarker()
1974 {
1975 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1976 if (debugMarkerStack_.stackCounter > 0U) {
1977 RenderCommandEndDebugMarker* data = AllocateRenderCommand<RenderCommandEndDebugMarker>(allocator_);
1978 if (data) {
1979 data->id = 0;
1980 renderCommands_.push_back({ RenderCommandType::END_DEBUG_MARKER, data });
1981 debugMarkerStack_.stackCounter--;
1982 }
1983 }
1984 #endif
1985 }
1986
ValidateRenderPass(const RenderPassDesc & renderPassDesc)1987 void RenderCommandList::ValidateRenderPass(const RenderPassDesc& renderPassDesc)
1988 {
1989 if (stateData_.renderPassHasBegun) {
1990 #if (RENDER_VALIDATION_ENABLED == 1)
1991 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_hasbegun_",
1992 "RenderCommandList: render pass is active, needs to be end before starting a new (node: %s)",
1993 nodeName_.c_str());
1994 #endif
1995 stateData_.validCommandList = false;
1996 }
1997 // validate render pass attachments
1998 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1999 if (!RenderHandleUtil::IsValid(renderPassDesc.attachmentHandles[idx])) {
2000 #if (RENDER_VALIDATION_ENABLED == 1)
2001 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_attachments_",
2002 "RenderCommandList: Invalid render pass attachment handle in index: %u (node:%s)", idx,
2003 nodeName_.c_str());
2004 #endif
2005 stateData_.validCommandList = false;
2006 }
2007 }
2008 }
2009
ValidatePipeline()2010 void RenderCommandList::ValidatePipeline()
2011 {
2012 if (!stateData_.validPso) {
2013 stateData_.validCommandList = false;
2014 #if (RENDER_VALIDATION_ENABLED == 1)
2015 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidatePipeline_", "RenderCommandList: PSO not bound.");
2016 #endif
2017 }
2018 }
2019
ValidatePipelineLayout()2020 void RenderCommandList::ValidatePipelineLayout()
2021 {
2022 if (stateData_.checkBindPipelineLayout) {
2023 stateData_.checkBindPipelineLayout = false;
2024 // fast check without validation
2025 const uint32_t pipelineLayoutSetsMask =
2026 RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
2027 if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
2028 #if (RENDER_VALIDATION_ENABLED == 1)
2029 PLUGIN_LOG_ONCE_E(
2030 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
2031 #endif
2032 }
2033 #if (RENDER_VALIDATION_ENABLED == 1)
2034 const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
2035 const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
2036 ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
2037 : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
2038 uint32_t plDescriptorSetCount = 0U;
2039 uint32_t bindCount = 0U;
2040 uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
2041 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
2042 const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
2043 if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
2044 bindCount++;
2045 bindSetIndices[idx] = idx;
2046 }
2047 if (pl.descriptorSetLayouts[idx].set != PipelineLayoutConstants::INVALID_INDEX) {
2048 plDescriptorSetCount++;
2049 }
2050 }
2051 if (bindCount < plDescriptorSetCount) {
2052 const auto debugName = nodeName_ + "not_all_pl_bound";
2053 PLUGIN_LOG_ONCE_E(nodeName_ + "not_all_pl_bound",
2054 "RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
2055 }
2056 #endif
2057 }
2058 }
2059
GetInterface(const BASE_NS::Uid & uid) const2060 const CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid) const
2061 {
2062 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2063 return this;
2064 }
2065 return nullptr;
2066 }
2067
GetInterface(const BASE_NS::Uid & uid)2068 CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid)
2069 {
2070 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2071 return this;
2072 }
2073 return nullptr;
2074 }
2075
Ref()2076 void RenderCommandList::Ref() {}
2077
Unref()2078 void RenderCommandList::Unref() {}
2079 RENDER_END_NAMESPACE()
2080