1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_command_list.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "util/linear_allocator.h"
32 #include "util/log.h"
33
34 using namespace BASE_NS;
35
36 RENDER_BEGIN_NAMESPACE()
37 PLUGIN_STATIC_ASSERT(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT == 4);
38 PLUGIN_STATIC_ASSERT(PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT == 8u);
39 namespace {
40 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)41 void ValidateImageUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
42 const RenderHandle handl, const ImageUsageFlags imageUsageFlags, const string_view str)
43 {
44 if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
45 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateImageUsageFlags_",
46 "RENDER_VALIDATION: gpu image (handle: %" PRIu64
47 ") (name: %s), not created with needed flags: %s, (node: %s)",
48 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
49 }
50 }
51
ValidateBufferUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)52 void ValidateBufferUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
53 const RenderHandle handl, const BufferUsageFlags bufferUsageFlags, const string_view str)
54 {
55 if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
56 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateBufferUsageFlags_",
57 "RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
58 ") (name: %s), not created with needed flags: %s, (node: %s)",
59 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
60 }
61 }
62
ValidateDescriptorTypeBinding(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)63 void ValidateDescriptorTypeBinding(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
64 const DescriptorSetLayoutBindingResources& bindingRes)
65 {
66 for (const auto& ref : bindingRes.buffers) {
67 if (!RenderHandleUtil::IsGpuBuffer(ref.resource.handle)) {
68 PLUGIN_LOG_ONCE_E(nodeName + "_invalid_gpu_buffer_", "RENDER_VALIDATION: invalid GPU buffer");
69 }
70 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
71 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
72 CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
73 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
74 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
75 CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
76 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
77 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
78 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
79 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
80 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
81 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
82 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
83 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
84 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
85 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
86 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
87 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
88 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
89 } else {
90 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
91 }
92 }
93 for (const auto& ref : bindingRes.images) {
94 if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
95 (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
96 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT,
97 "CORE_IMAGE_USAGE_SAMPLED_BIT");
98 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
99 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT,
100 "CORE_IMAGE_USAGE_STORAGE_BIT");
101 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
102 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
103 CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
104 } else {
105 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
106 }
107 }
108 for (const auto& ref : bindingRes.samplers) {
109 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
110 } else {
111 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
112 }
113 }
114 }
115
ValidateRenderPassAttachment(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)116 void ValidateRenderPassAttachment(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
117 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
118 {
119 const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[0]);
120 const uint32_t baseWidth = baseDesc.width;
121 const uint32_t baseHeight = baseDesc.height;
122 // NOTE: we do not check fragment shading rate attachment size
123 for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDesc.attachmentCount; ++attachmentIdx) {
124 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[attachmentIdx]);
125 if (desc.width != baseWidth || desc.height != baseHeight) {
126 for (const auto& subpassRef : subpassDescs) {
127 auto CheckAttachments = [](const auto& indices, const uint32_t count, const uint32_t attachmentIndex) {
128 for (uint32_t idx = 0; idx < count; ++idx) {
129 if (indices[idx] == attachmentIndex) {
130 return false;
131 }
132 }
133 return true;
134 };
135 bool valid = true;
136 valid &=
137 CheckAttachments(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount, attachmentIdx);
138 valid &=
139 CheckAttachments(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount, attachmentIdx);
140 valid &= CheckAttachments(
141 subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount, attachmentIdx);
142 if ((subpassRef.depthAttachmentIndex == attachmentIdx) ||
143 (subpassRef.depthResolveAttachmentIndex == attachmentIdx)) {
144 valid = false;
145 }
146 if (!valid) {
147 if (RenderHandleUtil::IsSwapchain(renderPassDesc.attachmentHandles[attachmentIdx]) &&
148 RenderHandleUtil::IsDepthImage(renderPassDesc.attachmentHandles[0])) {
149 PLUGIN_LOG_ONCE_W(nodeName + "_RCL_ValidateSize1_",
150 "RENDER_VALIDATION: Depth and swapchain input missmatch: baseWidth:%u baseHeight:%u "
151 "currWidth:%u currHeight:%u",
152 baseWidth, baseHeight, desc.width, desc.height);
153 } else {
154 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
155 "RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
156 attachmentIdx);
157 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
158 "RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
159 baseHeight, desc.width, desc.height);
160 }
161 }
162 }
163 }
164 }
165 if ((renderPassDesc.renderArea.extentWidth == 0) || (renderPassDesc.renderArea.extentHeight == 0)) {
166 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaExtent_",
167 "RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u) (node: %s)",
168 renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight, nodeName.data());
169 }
170 if ((renderPassDesc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
171 (renderPassDesc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
172 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaOffset_",
173 "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
174 "%u, baseHeight: %u, (node: %s)",
175 renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY, baseWidth, baseHeight,
176 nodeName.data());
177 }
178 }
179
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)180 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
181 const ImageSubresourceRange& imageSubresourceRange)
182 {
183 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
184 if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
185 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
186 imageSubresourceRange.baseMipLevel, desc.mipCount);
187 }
188 if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
189 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
190 imageSubresourceRange.baseArrayLayer, desc.layerCount);
191 }
192 }
193
ValidateViewport(const string_view nodeName,const ViewportDesc & vd)194 void ValidateViewport(const string_view nodeName, const ViewportDesc& vd)
195 {
196 if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
197 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateViewport_",
198 "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger (node: %s)", vd.width,
199 vd.height, nodeName.data());
200 }
201 }
202
ValidateScissor(const string_view nodeName,const ScissorDesc & sd)203 void ValidateScissor(const string_view nodeName, const ScissorDesc& sd)
204 {
205 if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
206 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateScissor_",
207 "RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero (node: %s)",
208 sd.extentWidth, sd.extentHeight, nodeName.data());
209 }
210 }
211
ValidateFragmentShadingRate(const Size2D & size)212 void ValidateFragmentShadingRate(const Size2D& size)
213 {
214 bool valid = true;
215 if ((size.width == 0) || (size.height == 0)) {
216 valid = false;
217 } else if ((size.width == 3u) || (size.height == 3u)) {
218 valid = false;
219 } else if ((size.width > 4u) || (size.height > 4u)) {
220 valid = false;
221 }
222 if (!valid) {
223 PLUGIN_LOG_W("RENDER_VALIDATION_ENABLED: fragmentSize must be less than or equal to 4 and the value must be a "
224 "power of two (width = %u, height = %u)",
225 size.width, size.height);
226 }
227 }
228 #endif // RENDER_VALIDATION_ENABLED
229
230 constexpr uint32_t INVALID_CL_IDX { ~0u };
231
232 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
233 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
234 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
235
236 // automatic acquire and release barriers
237 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
238
GetAlignedBytesize(const size_t byteSize,const size_t alignment)239 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
240 {
241 return (byteSize + alignment - 1) & (~(alignment - 1));
242 }
243
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t alignment,const size_t byteSz)244 void* AllocateRenderData(
245 RenderCommandList::LinearAllocatorStruct& allocator, const size_t alignment, const size_t byteSz)
246 {
247 PLUGIN_ASSERT(byteSz > 0);
248 void* rc = nullptr;
249 if (!allocator.allocators.empty()) {
250 const size_t currentIndex = allocator.allocators.size() - 1;
251 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
252 }
253
254 if (rc == nullptr) { // current allocator is out of memory
255 size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
256 const size_t currentIndex = allocator.allocators.size();
257 if (currentIndex > 0) {
258 allocatorByteSize =
259 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
260 }
261 allocator.allocators.push_back(make_unique<LinearAllocator>(allocatorByteSize));
262
263 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
264 if (rc == nullptr) {
265 PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
266 PLUGIN_ASSERT(false);
267 }
268 }
269 return rc;
270 }
271
272 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)273 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
274 {
275 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T) * count));
276 }
277
278 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)279 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
280 {
281 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T)));
282 }
283 } // namespace
284
RenderCommandList(const BASE_NS::string_view nodeName,NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)285 RenderCommandList::RenderCommandList(const BASE_NS::string_view nodeName,
286 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr, const GpuResourceManager& gpuResourceMgr,
287 const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue, const bool enableMultiQueue)
288 : IRenderCommandList(), nodeName_(nodeName),
289 #if (RENDER_VALIDATION_ENABLED == 1)
290 gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
291 #endif
292 nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
293 enableMultiQueue_(enableMultiQueue)
294 {}
295
BeginFrame()296 void RenderCommandList::BeginFrame()
297 {
298 if (allocator_.allocators.size() == 1) { // size is good for this frame
299 allocator_.allocators[0]->Reset();
300 } else if (allocator_.allocators.size() > 1) {
301 size_t fullByteSize = 0;
302 size_t alignment = 0;
303 for (auto& ref : allocator_.allocators) {
304 fullByteSize += ref->GetCurrentByteSize();
305 alignment = Math::max(alignment, (size_t)ref->GetAlignment());
306 ref.reset();
307 }
308 allocator_.allocators.clear();
309
310 // add some room for current frame allocation for new render commands
311 const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
312 fullByteSize += extraBytes;
313
314 // create new single allocation for combined previous size and some extra bytes
315 const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
316 allocator_.allocators.push_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
317 }
318
319 ResetStateData();
320
321 const auto clearAndReserve = [](auto& vec) {
322 const size_t count = vec.size();
323 vec.clear();
324 vec.reserve(count);
325 };
326
327 clearAndReserve(renderCommands_);
328 clearAndReserve(customBarriers_);
329 clearAndReserve(rpVertexInputBufferBarriers_);
330 clearAndReserve(rpIndirectBufferBarriers_);
331 clearAndReserve(descriptorSetHandlesForBarriers_);
332 clearAndReserve(descriptorSetHandlesForUpdates_);
333 clearAndReserve(backendCommands_);
334 clearAndReserve(processBackendCommands_);
335
336 validReleaseAcquire_ = false;
337 hasMultiRpCommandListSubpasses_ = false;
338 multiRpCommandListData_ = {};
339 // NOTE: we cannot reset hadGlobalDescriptorSetBindings_ to false
340 // this flag instructs the backend tasks to wait for the global descriptor set updates
341 // this could be further optimized to be reset after buffering count frames of no use of global descriptor sets
342
343 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
344 debugMarkerStack_ = {};
345 #endif
346 }
347
SetValidGpuQueueReleaseAcquireBarriers()348 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
349 {
350 if (enableMultiQueue_) {
351 validReleaseAcquire_ = true;
352 }
353 }
354
BeforeRenderNodeExecuteFrame()355 void RenderCommandList::BeforeRenderNodeExecuteFrame()
356 {
357 // add possible barrier point for gpu queue transfer acquire
358 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
359 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
360 }
361 }
362
AfterRenderNodeExecuteFrame()363 void RenderCommandList::AfterRenderNodeExecuteFrame()
364 {
365 #if (RENDER_VALIDATION_ENABLED == 1)
366 if (stateData_.renderPassHasBegun) {
367 PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
368 }
369 if (!stateData_.automaticBarriersEnabled) {
370 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
371 }
372 #endif
373
374 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
375 if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
376 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
377 }
378
379 // add possible barrier point for gpu queue transfer release
380 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
381 }
382
383 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
384 for (uint32_t idx = debugMarkerStack_.stackCounter; idx > 0U; --idx) {
385 EndDebugMarker();
386 }
387 #endif
388 }
389
GetRenderCommands() const390 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
391 {
392 if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
393 #if (RENDER_VALIDATION_ENABLED == 1)
394 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_GetRenderCommands_",
395 "RenderCommandList: invalid state data in render command list (node: %s)", nodeName_.c_str());
396 #endif
397 return {};
398 } else {
399 return { renderCommands_.data(), renderCommands_.size() };
400 }
401 }
402
HasValidRenderCommands() const403 bool RenderCommandList::HasValidRenderCommands() const
404 {
405 uint32_t renderCommandCount = GetRenderCommandCount();
406 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
407 renderCommandCount = static_cast<uint32_t>(
408 Math::max(0, static_cast<int32_t>(renderCommandCount) - static_cast<int32_t>(debugMarkerStack_.commandCount)));
409 #endif
410 bool valid = false;
411 if (enableMultiQueue_) {
412 if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
413 // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
414 valid = validReleaseAcquire_;
415 } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
416 valid = true;
417 }
418 } else {
419 valid = (renderCommandCount > 0);
420 }
421 // add processing for render command list
422 if (!processBackendCommands_.empty()) {
423 valid = true;
424 }
425 valid = valid && stateData_.validCommandList;
426
427 return valid;
428 }
429
GetRenderCommandCount() const430 uint32_t RenderCommandList::GetRenderCommandCount() const
431 {
432 return (uint32_t)renderCommands_.size();
433 }
434
GetGpuQueue() const435 GpuQueue RenderCommandList::GetGpuQueue() const
436 {
437 return gpuQueue_;
438 }
439
HasMultiRenderCommandListSubpasses() const440 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
441 {
442 return hasMultiRpCommandListSubpasses_;
443 }
444
GetMultiRenderCommandListData() const445 MultiRenderPassCommandListData RenderCommandList::GetMultiRenderCommandListData() const
446 {
447 return multiRpCommandListData_;
448 }
449
HasGlobalDescriptorSetBindings() const450 bool RenderCommandList::HasGlobalDescriptorSetBindings() const
451 {
452 return hadGlobalDescriptorSetBindings_;
453 }
454
GetCustomBarriers() const455 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
456 {
457 return { customBarriers_.data(), customBarriers_.size() };
458 }
459
GetRenderpassVertexInputBufferBarriers() const460 array_view<const VertexBuffer> RenderCommandList::GetRenderpassVertexInputBufferBarriers() const
461 {
462 return { rpVertexInputBufferBarriers_.data(), rpVertexInputBufferBarriers_.size() };
463 }
464
GetRenderpassIndirectBufferBarriers() const465 array_view<const VertexBuffer> RenderCommandList::GetRenderpassIndirectBufferBarriers() const
466 {
467 return { rpIndirectBufferBarriers_.data(), rpIndirectBufferBarriers_.size() };
468 }
469
GetDescriptorSetHandles() const470 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
471 {
472 return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
473 }
474
GetUpdateDescriptorSetHandles() const475 array_view<const RenderHandle> RenderCommandList::GetUpdateDescriptorSetHandles() const
476 {
477 return { descriptorSetHandlesForUpdates_.data(), descriptorSetHandlesForUpdates_.size() };
478 }
479
AddBarrierPoint(const RenderCommandType renderCommandType)480 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
481 {
482 if (!stateData_.automaticBarriersEnabled) {
483 return; // no barrier point added
484 }
485
486 auto* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
487 if (!data) {
488 return; // early out
489 }
490 *data = {}; // zero initialize
491
492 data->renderCommandType = renderCommandType;
493 data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
494
495 // update new index (within render pass there might not be any dirty descriptor sets at this stage)
496 const auto descriptorSetBeginIndex = static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size());
497 data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
498 data->descriptorSetHandleCount = 0U;
499 // update new index (only valid with render pass)
500 data->vertexIndexBarrierIndexBegin = static_cast<uint32_t>(rpVertexInputBufferBarriers_.size());
501 data->vertexIndexBarrierCount = 0U;
502 // update new index (only valid with render pass)
503 data->indirectBufferBarrierIndexBegin = static_cast<uint32_t>(rpIndirectBufferBarriers_.size());
504 data->indirectBufferBarrierCount = 0U;
505
506 // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
507 const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
508 renderCommandType == RenderCommandType::DISPATCH ||
509 renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
510 if (handleDescriptorSets) {
511 stateData_.dirtyDescriptorSetsForBarriers = false;
512 for (auto& currentBoundSet : stateData_.currentBoundSets) {
513 // only add descriptor set handles for barriers if there are dynamic barrier resources
514 if (currentBoundSet.hasDynamicBarrierResources) {
515 descriptorSetHandlesForBarriers_.push_back(currentBoundSet.descriptorSetHandle);
516 }
517 }
518 data->descriptorSetHandleCount = (uint32_t)descriptorSetHandlesForBarriers_.size() - descriptorSetBeginIndex;
519 }
520
521 const bool handleCustomBarriers =
522 ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
523 if (handleCustomBarriers) {
524 const int32_t newCount = (int32_t)customBarriers_.size() - stateData_.currentCustomBarrierIndices.prevSize;
525 if (newCount > 0) {
526 data->customBarrierIndexBegin = (uint32_t)stateData_.currentCustomBarrierIndices.prevSize;
527 data->customBarrierCount = (uint32_t)newCount;
528
529 stateData_.currentCustomBarrierIndices.prevSize = (int32_t)customBarriers_.size();
530 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
531 }
532 }
533
534 // store current barrier point for render command list
535 // * binding descriptor sets (with dynamic barrier resources)
536 // * binding vertex and index buffers (with dynamic barrier resources)
537 // * indirect args buffer (with dynamic barrier resources)
538 // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
539 stateData_.currentBarrierPoint = data;
540
541 renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
542 }
543
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)544 void RenderCommandList::Draw(
545 const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
546 {
547 #if (RENDER_VALIDATION_ENABLED == 1)
548 if (!stateData_.renderPassHasBegun) {
549 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_Draw_",
550 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw)");
551 }
552 #endif
553
554 if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
555 ValidatePipeline();
556 ValidatePipelineLayout();
557
558 auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
559 if (data) {
560 data->drawType = DrawType::DRAW;
561 data->vertexCount = vertexCount;
562 data->instanceCount = instanceCount;
563 data->firstVertex = firstVertex;
564 data->firstInstance = firstInstance;
565 data->indexCount = 0;
566 data->firstIndex = 0;
567 data->vertexOffset = 0;
568
569 renderCommands_.push_back({ RenderCommandType::DRAW, data });
570 }
571 }
572 }
573
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)574 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
575 const int32_t vertexOffset, const uint32_t firstInstance)
576 {
577 #if (RENDER_VALIDATION_ENABLED == 1)
578 if (!stateData_.renderPassHasBegun) {
579 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DrawIndexed_",
580 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw).");
581 }
582 #endif
583
584 if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
585 ValidatePipeline();
586 ValidatePipelineLayout();
587
588 auto* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
589 if (data) {
590 data->drawType = DrawType::DRAW_INDEXED;
591 data->vertexCount = 0;
592 data->instanceCount = instanceCount;
593 data->firstVertex = 0;
594 data->firstInstance = firstInstance;
595 data->indexCount = indexCount;
596 data->firstIndex = firstIndex;
597 data->vertexOffset = vertexOffset;
598
599 renderCommands_.push_back({ RenderCommandType::DRAW, data });
600 }
601 }
602 }
603
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)604 void RenderCommandList::DrawIndirect(
605 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
606 {
607 #if (RENDER_VALIDATION_ENABLED == 1)
608 if (!stateData_.renderPassHasBegun) {
609 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
610 }
611 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
612 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DI_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
613 }
614 #endif
615
616 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
617 ValidatePipeline();
618 ValidatePipelineLayout();
619
620 auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
621 if (data) {
622 data->drawType = DrawType::DRAW_INDIRECT;
623 data->argsHandle = bufferHandle;
624 data->offset = offset;
625 data->drawCount = drawCount;
626 data->stride = stride;
627
628 // add possible indirect buffer barrier before render pass
629 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
630 constexpr uint32_t drawIndirectCommandSize { 4U * sizeof(uint32_t) };
631 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
632 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
633 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
634 }
635
636 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
637 }
638 }
639 }
640
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)641 void RenderCommandList::DrawIndexedIndirect(
642 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
643 {
644 #if (RENDER_VALIDATION_ENABLED == 1)
645 if (!stateData_.renderPassHasBegun) {
646 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
647 }
648 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
649 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DII_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
650 }
651 #endif
652
653 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
654 ValidatePipeline();
655 ValidatePipelineLayout();
656
657 auto* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
658 if (data) {
659 data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
660 data->argsHandle = bufferHandle;
661 data->offset = offset;
662 data->drawCount = drawCount;
663 data->stride = stride;
664
665 // add possible indirect buffer barrier before render pass
666 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
667 constexpr uint32_t drawIndirectCommandSize { 5U * sizeof(uint32_t) };
668 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
669 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
670 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
671 }
672
673 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
674 }
675 }
676 }
677
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)678 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
679 {
680 if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
681 ValidatePipeline();
682 ValidatePipelineLayout();
683
684 AddBarrierPoint(RenderCommandType::DISPATCH);
685
686 auto* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
687 if (data) {
688 data->groupCountX = groupCountX;
689 data->groupCountY = groupCountY;
690 data->groupCountZ = groupCountZ;
691
692 renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
693 }
694 }
695 }
696
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)697 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
698 {
699 ValidatePipeline();
700 ValidatePipelineLayout();
701
702 AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
703
704 auto* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
705 if (data) {
706 data->argsHandle = bufferHandle;
707 data->offset = offset;
708
709 renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
710 }
711 }
712
BindPipeline(const RenderHandle psoHandle)713 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
714 {
715 // NOTE: we cannot early out with the same pso handle
716 // the render pass and it's hashes might have been changed
717 // the final pso needs to be hashed with final render pass
718 // the backends try to check the re-binding of the same pipeline
719 // another approach would be to check when render pass changes to re-bind psos if needed
720
721 bool valid = RenderHandleUtil::IsValid(psoHandle);
722
723 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
724 PipelineBindPoint pipelineBindPoint {};
725 if (handleType == RenderHandleType::COMPUTE_PSO) {
726 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
727 } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
728 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
729 } else {
730 valid = false;
731 }
732
733 stateData_.checkBindPipelineLayout = true;
734 #if (RENDER_VALIDATION_ENABLED == 1)
735 if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
736 if (!stateData_.renderPassHasBegun) {
737 valid = false;
738 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_BindPipeline_",
739 "RENDER_VALIDATION: RenderCommandList: bind pipeline after render pass begin.");
740 }
741 }
742 #endif
743
744 stateData_.validPso = valid;
745 ValidatePipeline();
746
747 stateData_.currentPsoHandle = psoHandle;
748 stateData_.currentPsoBindPoint = pipelineBindPoint;
749
750 auto* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
751 if (data) {
752 data->psoHandle = psoHandle;
753 data->pipelineBindPoint = pipelineBindPoint;
754
755 renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
756 }
757 }
758
PushConstantData(const RENDER_NS::PushConstant & pushConstant,const BASE_NS::array_view<const uint8_t> data)759 void RenderCommandList::PushConstantData(
760 const RENDER_NS::PushConstant& pushConstant, const BASE_NS::array_view<const uint8_t> data)
761 {
762 ValidatePipeline();
763
764 // push constant is not used/allocated if byte size is bigger than supported max
765 if ((pushConstant.byteSize > 0) &&
766 (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && (!data.empty())) {
767 auto* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
768 // use aligment of uint32 as currently the push constants are uint32s
769 // the data is allocated by shader/pipeline needs
770 uint8_t* pushData =
771 static_cast<uint8_t*>(AllocateRenderData(allocator_, std::alignment_of<uint32_t>(), pushConstant.byteSize));
772 if (rc && pushData) {
773 rc->psoHandle = stateData_.currentPsoHandle;
774 rc->pushConstant = pushConstant;
775 rc->data = pushData;
776 // the max amount of visible data is copied
777 const size_t minData = Math::min(static_cast<size_t>(pushConstant.byteSize), data.size_bytes());
778 CloneData(rc->data, pushConstant.byteSize, data.data(), minData);
779
780 renderCommands_.push_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
781 }
782 } else if (pushConstant.byteSize > 0) {
783 #if (RENDER_VALIDATION_ENABLED == 1)
784 PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
785 PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
786 #endif
787 }
788 }
789
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)790 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
791 {
792 if ((pushConstant.byteSize > 0) && data) {
793 PushConstantData(pushConstant, { data, pushConstant.byteSize });
794 }
795 }
796
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)797 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
798 {
799 ValidatePipeline();
800
801 #if (RENDER_VALIDATION_ENABLED == 1)
802 if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
803 PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
804 }
805 #endif
806
807 if (vertexBuffers.empty()) {
808 return; // early out
809 }
810 auto* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
811 if (!data) {
812 return; // early out
813 }
814
815 VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
816 uint32_t dynamicBarrierVertexBufferCount = 0;
817 const uint32_t vertexBufferCount =
818 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, (uint32_t)vertexBuffers.size());
819 data->vertexBufferCount = vertexBufferCount;
820 RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
821 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
822 data->vertexBuffers[idx] = vertexBuffers[idx];
823 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
824 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
825 (vertexBuffers[idx].byteSize > 0)) {
826 // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
827 dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
828 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
829 previousVbHandle = currVbHandle;
830 }
831 }
832
833 // add possible vertex/index buffer barriers before render pass
834 if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
835 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
836 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
837 const size_t currCount = rpVertexInputBufferBarriers_.size();
838 rpVertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
839 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
840 rpVertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
841 }
842 }
843
844 renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
845 }
846
BindIndexBuffer(const IndexBuffer & indexBuffer)847 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
848 {
849 ValidatePipeline();
850
851 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
852 #if (RENDER_VALIDATION_ENABLED == 1)
853 if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
854 PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
855 }
856 #endif
857
858 auto* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
859 if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
860 data->indexBuffer = indexBuffer;
861 if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
862 stateData_.currentBarrierPoint->vertexIndexBarrierCount++;
863 rpVertexInputBufferBarriers_.push_back(
864 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
865 }
866 renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
867 }
868 }
869
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)870 void RenderCommandList::BeginRenderPass(
871 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
872 {
873 #if (RENDER_VALIDATION_ENABLED == 1)
874 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
875 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
876 }
877 #endif
878 if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
879 #if (RENDER_VALIDATION_ENABLED == 1)
880 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_subpass_",
881 "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
882 renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
883 #endif
884 stateData_.validCommandList = false;
885 }
886 ValidateRenderPass(renderPassDesc);
887 if (!stateData_.validCommandList) {
888 return;
889 }
890
891 stateData_.renderPassHasBegun = true;
892 stateData_.renderPassStartIndex = 0;
893 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
894
895 if (renderPassDesc.attachmentCount == 0) {
896 return;
897 }
898 #if (RENDER_VALIDATION_ENABLED == 1)
899 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, subpassDescs);
900 #endif
901 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
902
903 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
904 // NOTE: hashed in the backend
905 PLUGIN_ASSERT(renderPassDesc.subpassCount == (uint32_t)subpassDescs.size());
906
907 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
908 data->renderPassDesc = renderPassDesc;
909 data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
910 data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
911 data->subpassStartIndex = 0;
912 // if false -> initial layout is undefined
913 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
914
915 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
916 renderPassDesc.subpassCount };
917 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
918 allocator_, renderPassDesc.subpassCount),
919 renderPassDesc.subpassCount };
920 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
921 return;
922 }
923
924 CloneData(data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
925
926 bool valid = true;
927 for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
928 const auto& subpassRef = subpassDescs[subpassIdx];
929
930 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
931 subpassResourceStates = {};
932
933 valid = valid && ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
934 valid = valid && ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
935 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
936 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
937 valid = valid && ProcessFragmentShadingRateAttachments(renderPassDesc, subpassRef, subpassResourceStates);
938 #if (RENDER_VULKAN_FSR_ENABLED != 1)
939 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
940 #endif
941 }
942 if (!valid) {
943 stateData_.validCommandList = false;
944 }
945
946 // render pass layouts will be updated by render graph
947 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
948 }
949 }
950
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)951 void RenderCommandList::BeginRenderPass(
952 const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
953 {
954 #if (RENDER_VALIDATION_ENABLED == 1)
955 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
956 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
957 }
958 #endif
959
960 if (subpassStartIdx >= renderPassDesc.subpassCount) {
961 PLUGIN_LOG_E("RCL:BeginRenderPass: subpassStartIdx(%u) must be smaller than renderPassDesc.subpassCount (%u)",
962 subpassStartIdx, renderPassDesc.subpassCount);
963 stateData_.validCommandList = false;
964 }
965
966 ValidateRenderPass(renderPassDesc);
967 if (!stateData_.validCommandList) {
968 return;
969 }
970
971 stateData_.renderPassHasBegun = true;
972 stateData_.renderPassStartIndex = subpassStartIdx;
973 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
974
975 if (renderPassDesc.attachmentCount > 0) {
976 #if (RENDER_VALIDATION_ENABLED == 1)
977 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, { &subpassDesc, 1u });
978 #endif
979 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
980
981 if (hasMultiRpCommandListSubpasses_) {
982 PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
983 stateData_.validCommandList = false;
984 } else if (renderPassDesc.subpassCount > 1) {
985 hasMultiRpCommandListSubpasses_ = true;
986 multiRpCommandListData_.secondaryCmdLists =
987 (renderPassDesc.subpassContents == CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS);
988 if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BARRIER_POINT)) {
989 multiRpCommandListData_.rpBarrierCmdIndex = static_cast<uint32_t>(renderCommands_.size()) - 1u;
990 }
991 }
992 multiRpCommandListData_.subpassCount = renderPassDesc.subpassCount;
993 multiRpCommandListData_.rpBeginCmdIndex = static_cast<uint32_t>(renderCommands_.size());
994
995 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
996 // NOTE: hashed in the backend
997 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
998 data->renderPassDesc = renderPassDesc;
999 data->subpassStartIndex = subpassStartIdx;
1000 // if false -> initial layout is undefined
1001 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
1002
1003 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
1004 renderPassDesc.subpassCount };
1005 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
1006 allocator_, renderPassDesc.subpassCount),
1007 renderPassDesc.subpassCount };
1008 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
1009 return;
1010 }
1011
1012 bool valid = true;
1013 for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
1014 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
1015 subpassResourceStates = {};
1016 data->subpasses[subpassIdx] = {};
1017
1018 if (subpassIdx == subpassStartIdx) {
1019 data->subpasses[subpassIdx] = subpassDesc;
1020 valid = valid && ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1021 valid = valid && ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1022 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1023 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1024 valid = valid &&
1025 ProcessFragmentShadingRateAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1026 #if (RENDER_VULKAN_FSR_ENABLED != 1)
1027 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
1028 #endif
1029 }
1030 }
1031 if (!valid) {
1032 stateData_.validCommandList = false;
1033 }
1034
1035 // render pass layouts will be updated by render graph
1036 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
1037 }
1038 }
1039 }
1040
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1041 bool RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
1042 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1043 {
1044 bool valid = true;
1045 for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
1046 const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
1047 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1048 if (!RenderHandleUtil::IsGpuImage(handle)) {
1049 valid = false;
1050 }
1051
1052 // NOTE: mipLevel and layers are not updated to GpuResourceState
1053 // NOTE: validation needed for invalid handles
1054 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1055 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1056 refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1057 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1058 refState.gpuQueue = gpuQueue_;
1059 // if used e.g. as input and color attachment use general layout
1060 if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
1061 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
1062 } else {
1063 subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
1064 ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
1065 : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1066 }
1067 #if (RENDER_VALIDATION_ENABLED == 1)
1068 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1069 ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
1070 #endif
1071 }
1072 return valid;
1073 }
1074
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1075 bool RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
1076 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1077 {
1078 bool valid = true;
1079 for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
1080 const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
1081 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1082 if (!RenderHandleUtil::IsGpuImage(handle)) {
1083 valid = false;
1084 }
1085 #if (RENDER_VALIDATION_ENABLED == 1)
1086 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1087 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1088 #endif
1089
1090 // NOTE: mipLevel and layers are not updated to GpuResourceState
1091 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1092 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1093 refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1094 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1095 refState.gpuQueue = gpuQueue_;
1096 // if used e.g. as input and color attachment use general layout
1097 subpassResourceStates.layouts[attachmentIndex] =
1098 (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
1099 ? CORE_IMAGE_LAYOUT_GENERAL
1100 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1101 }
1102 return valid;
1103 }
1104
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1105 bool RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
1106 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1107 {
1108 bool valid = true;
1109 for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
1110 const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
1111 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1112 if (!RenderHandleUtil::IsGpuImage(handle)) {
1113 valid = false;
1114 }
1115 #if (RENDER_VALIDATION_ENABLED == 1)
1116 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1117 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1118 #endif
1119
1120 // NOTE: mipLevel and layers are not updated to GpuResourceState
1121 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1122 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1123 refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1124 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1125 refState.gpuQueue = gpuQueue_;
1126 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1127 }
1128 return valid;
1129 }
1130
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1131 bool RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
1132 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1133 {
1134 bool valid = true;
1135 if (subpassRef.depthAttachmentCount == 1) {
1136 const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
1137 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1138 if (!RenderHandleUtil::IsDepthImage(handle)) {
1139 valid = false;
1140 }
1141 #if (RENDER_VALIDATION_ENABLED == 1)
1142 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1143 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1144 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1145 #endif
1146
1147 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1148 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1149 refState.accessFlags |=
1150 (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1151 refState.pipelineStageFlags |=
1152 (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
1153 refState.gpuQueue = gpuQueue_;
1154 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1155 }
1156 if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
1157 const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
1158 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1159 if (!RenderHandleUtil::IsDepthImage(handle)) {
1160 valid = false;
1161 }
1162 #if (RENDER_VALIDATION_ENABLED == 1)
1163 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1164 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1165 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1166 #endif
1167
1168 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1169 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1170 refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1171 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
1172 refState.gpuQueue = gpuQueue_;
1173 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1174 }
1175 return valid;
1176 }
1177
ProcessFragmentShadingRateAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1178 bool RenderCommandList::ProcessFragmentShadingRateAttachments(const RenderPassDesc& renderPassDsc,
1179 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1180 {
1181 bool valid = true;
1182 if (subpassRef.fragmentShadingRateAttachmentCount == 1) {
1183 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1184 const uint32_t attachmentIndex = subpassRef.fragmentShadingRateAttachmentIndex;
1185 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1186 if (!RenderHandleUtil::IsGpuImage(handle)) {
1187 valid = false;
1188 }
1189 #if (RENDER_VALIDATION_ENABLED == 1)
1190 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1191 ImageUsageFlagBits::CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT,
1192 "CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT");
1193 #endif
1194
1195 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1196 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1197 refState.accessFlags |= CORE_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT;
1198 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT;
1199 refState.gpuQueue = gpuQueue_;
1200 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL;
1201 #else
1202 PLUGIN_LOG_ONCE_I("vk_fsr_disabled_flag",
1203 "RENDER_VALIDATION: Fragment shading rate disabled and all related attachments ignored.");
1204 #endif
1205 }
1206 return valid;
1207 }
1208
NextSubpass(const SubpassContents & subpassContents)1209 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
1210 {
1211 auto* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
1212 if (data) {
1213 data->subpassContents = subpassContents;
1214 data->renderCommandListIndex = 0; // will be updated in the render graph
1215
1216 renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1217 }
1218 }
1219
EndRenderPass()1220 void RenderCommandList::EndRenderPass()
1221 {
1222 if (!stateData_.renderPassHasBegun) {
1223 #if (RENDER_VALIDATION_ENABLED == 1)
1224 PLUGIN_LOG_ONCE_E(
1225 nodeName_ + "_RCL_EndRenderPass_", "RenderCommandList: render pass needs to begin before calling end");
1226 #endif
1227 stateData_.validCommandList = false;
1228 return;
1229 }
1230
1231 if (hasMultiRpCommandListSubpasses_ && (multiRpCommandListData_.rpBeginCmdIndex != INVALID_CL_IDX)) {
1232 multiRpCommandListData_.rpEndCmdIndex = static_cast<uint32_t>(renderCommands_.size());
1233 }
1234
1235 auto* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1236 if (data) {
1237 // will be updated in render graph if multi render command list render pass
1238 data->endType = RenderPassEndType::END_RENDER_PASS;
1239 data->subpassStartIndex = stateData_.renderPassStartIndex;
1240 data->subpassCount = stateData_.renderPassSubpassCount;
1241
1242 renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1243 }
1244
1245 stateData_.renderPassHasBegun = false;
1246 stateData_.renderPassStartIndex = 0;
1247 stateData_.renderPassSubpassCount = 0;
1248 }
1249
BeginDisableAutomaticBarrierPoints()1250 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1251 {
1252 #if (RENDER_VALIDATION_ENABLED == 1)
1253 if (!stateData_.automaticBarriersEnabled) {
1254 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1255 }
1256 #endif
1257 PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1258
1259 // barrier point for pending barriers
1260 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1261 stateData_.automaticBarriersEnabled = false;
1262 }
1263
EndDisableAutomaticBarrierPoints()1264 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1265 {
1266 #if (RENDER_VALIDATION_ENABLED == 1)
1267 if (stateData_.automaticBarriersEnabled) {
1268 PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1269 }
1270 #endif
1271 PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1272
1273 stateData_.automaticBarriersEnabled = true;
1274 }
1275
AddCustomBarrierPoint()1276 void RenderCommandList::AddCustomBarrierPoint()
1277 {
1278 const bool barrierState = stateData_.automaticBarriersEnabled;
1279 stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1280 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1281 stateData_.automaticBarriersEnabled = barrierState;
1282 }
1283
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1284 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1285 {
1286 #if (RENDER_VALIDATION_ENABLED == 1)
1287 if (stateData_.renderPassHasBegun) {
1288 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1289 }
1290 #endif
1291
1292 CommandBarrier cb {
1293 RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1294 {
1295 source.accessFlags,
1296 source.pipelineStageFlags,
1297 },
1298 {},
1299 {
1300 destination.accessFlags,
1301 destination.pipelineStageFlags,
1302 },
1303 {},
1304 };
1305
1306 customBarriers_.push_back(move(cb));
1307
1308 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1309 }
1310
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1311 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1312 const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1313 {
1314 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1315
1316 #if (RENDER_VALIDATION_ENABLED == 1)
1317 if (stateData_.renderPassHasBegun) {
1318 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1319 }
1320 if (byteSize == 0) {
1321 PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1322 "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1323 }
1324 if (handleType != RenderHandleType::GPU_BUFFER) {
1325 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1326 }
1327 #endif
1328
1329 if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1330 ResourceBarrier src;
1331 src.accessFlags = source.accessFlags;
1332 src.pipelineStageFlags = source.pipelineStageFlags;
1333 src.optionalByteOffset = byteOffset;
1334 src.optionalByteSize = byteSize;
1335
1336 ResourceBarrier dst;
1337 dst.accessFlags = destination.accessFlags;
1338 dst.pipelineStageFlags = destination.pipelineStageFlags;
1339 dst.optionalByteOffset = byteOffset;
1340 dst.optionalByteSize = byteSize;
1341
1342 CommandBarrier cb {
1343 handle,
1344 src,
1345 {},
1346 dst,
1347 {},
1348 };
1349
1350 customBarriers_.push_back(move(cb));
1351
1352 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1353 }
1354 }
1355
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1356 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1357 const ImageSubresourceRange& imageSubresourceRange)
1358 {
1359 // specific layout MAX_ENUM to state that we fetch the correct state
1360 ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1361 CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1362 }
1363
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1364 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1365 const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1366 {
1367 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1368
1369 #if (RENDER_VALIDATION_ENABLED == 1)
1370 if (stateData_.renderPassHasBegun) {
1371 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1372 }
1373 if (handleType != RenderHandleType::GPU_IMAGE) {
1374 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1375 }
1376 ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1377 #endif
1378
1379 if (handleType == RenderHandleType::GPU_IMAGE) {
1380 ResourceBarrier src;
1381 src.accessFlags = source.accessFlags;
1382 src.pipelineStageFlags = source.pipelineStageFlags;
1383 src.optionalImageLayout = source.imageLayout;
1384 src.optionalImageSubresourceRange = imageSubresourceRange;
1385
1386 ResourceBarrier dst;
1387 dst.accessFlags = destination.accessFlags;
1388 dst.pipelineStageFlags = destination.pipelineStageFlags;
1389 dst.optionalImageLayout = destination.imageLayout;
1390 dst.optionalImageSubresourceRange = imageSubresourceRange;
1391
1392 CommandBarrier cb {
1393 handle,
1394 src,
1395 {},
1396 dst,
1397 {},
1398 };
1399
1400 customBarriers_.push_back(cb);
1401
1402 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1403 }
1404 }
1405
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1406 void RenderCommandList::CopyBufferToBuffer(
1407 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1408 {
1409 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1410 // NOTE: combine copies, and only single combined barrier?
1411 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1412 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1413 AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1414 }
1415
1416 auto* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1417 if (data) {
1418 data->srcHandle = sourceHandle;
1419 data->dstHandle = destinationHandle;
1420 data->bufferCopy = bufferCopy;
1421
1422 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1423 }
1424 } else {
1425 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1426 }
1427 }
1428
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1429 void RenderCommandList::CopyBufferToImage(
1430 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1431 {
1432 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1433 // NOTE: combine copies, and only single combined barrier?
1434 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1435 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1436 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1437 }
1438
1439 auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1440 if (data) {
1441 data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1442 data->srcHandle = sourceHandle;
1443 data->dstHandle = destinationHandle;
1444 data->bufferImageCopy = bufferImageCopy;
1445
1446 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1447 }
1448 } else {
1449 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1450 }
1451 }
1452
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1453 void RenderCommandList::CopyImageToBuffer(
1454 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1455 {
1456 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1457 // NOTE: combine copies, and only single combined barrier?
1458 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1459 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1460 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1461 }
1462
1463 auto* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1464 if (data) {
1465 data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1466 data->srcHandle = sourceHandle;
1467 data->dstHandle = destinationHandle;
1468 data->bufferImageCopy = bufferImageCopy;
1469
1470 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1471 }
1472 } else {
1473 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1474 }
1475 }
1476
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1477 void RenderCommandList::CopyImageToImage(
1478 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1479 {
1480 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1481 // NOTE: combine copies, and only single combined barrier?
1482 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1483 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1484 AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1485 }
1486
1487 auto* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1488 if (data) {
1489 data->srcHandle = sourceHandle;
1490 data->dstHandle = destinationHandle;
1491 data->imageCopy = imageCopy;
1492
1493 renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1494 }
1495 } else {
1496 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1497 }
1498 }
1499
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1500 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1501 const ImageBlit& imageBlit, const Filter filter)
1502 {
1503 if (!stateData_.renderPassHasBegun) {
1504 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1505 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1506 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1507 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1508 }
1509
1510 auto* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1511 if (data) {
1512 data->srcHandle = sourceHandle;
1513 data->dstHandle = destinationHandle;
1514 data->imageBlit = imageBlit;
1515 data->filter = filter;
1516 // NOTE: desired layouts (barrier point needs to respect these)
1517 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1518 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1519
1520 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1521 }
1522 }
1523 } else {
1524 PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1525 }
1526 }
1527
UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)1528 void RenderCommandList::UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,
1529 const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)
1530 {
1531 #if (RENDER_VALIDATION_ENABLED == 1)
1532 if (handles.size() != bindingResources.size()) {
1533 PLUGIN_LOG_W("RENDER_VALIDATION: UpdateDescriptorSets handles and bindingResources size does not match");
1534 }
1535 #endif
1536 const uint32_t count = static_cast<uint32_t>(Math::min(handles.size(), bindingResources.size()));
1537 if (count > 0U) {
1538 for (uint32_t idx = 0; idx < count; ++idx) {
1539 const auto& handleRef = handles[idx];
1540 const auto& bindingResRef = bindingResources[idx];
1541 #if (RENDER_VALIDATION_ENABLED == 1)
1542 ValidateDescriptorTypeBinding(nodeName_, gpuResourceMgr_, bindingResRef);
1543 #endif
1544 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handleRef);
1545 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(handleRef);
1546 #if (RENDER_VALIDATION_ENABLED == 1)
1547 if (bindingResRef.bindingMask != bindingResRef.descriptorSetBindingMask) {
1548 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSets_bm_",
1549 "RENDER_VALIDATION: invalid bindings in descriptor set update (node:%s)", nodeName_.c_str());
1550 }
1551 if (handleType != RenderHandleType::DESCRIPTOR_SET) {
1552 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1553 }
1554 #endif
1555 if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1556 const DescriptorSetUpdateInfoFlags updateFlags =
1557 nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handleRef, bindingResRef, gpuQueue_);
1558 if ((updateFlags == DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_NEW_BIT) &&
1559 ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) == 0U)) {
1560 descriptorSetHandlesForUpdates_.push_back(handleRef);
1561 } else if (updateFlags & DescriptorSetUpdateInfoFlagBits::DESCRIPTOR_SET_UPDATE_INFO_INVALID_BIT) {
1562 #if (RENDER_VALIDATION_ENABLED == 1)
1563 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSet_invalid_",
1564 "RenderCommandList: invalid descriptor set bindings with update (node:%s)", nodeName_.c_str());
1565 #endif
1566 }
1567 }
1568 }
1569 }
1570 }
1571
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1572 void RenderCommandList::UpdateDescriptorSet(
1573 const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1574 {
1575 UpdateDescriptorSets({ &handle, 1U }, { &bindingResources, 1U });
1576 }
1577
BindDescriptorSets(const uint32_t firstSet,const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)1578 void RenderCommandList::BindDescriptorSets(
1579 const uint32_t firstSet, const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)
1580 {
1581 if (descriptorSetData.empty()) {
1582 return;
1583 }
1584 const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(descriptorSetData.size());
1585 if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1586 PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1587 maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1588 return;
1589 }
1590
1591 ValidatePipeline();
1592
1593 #if (RENDER_VALIDATION_ENABLED == 1)
1594 if ((descriptorSetData.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
1595 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1596 }
1597 for (const auto& ref : descriptorSetData) {
1598 if (ref.dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT) {
1599 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1600 }
1601 }
1602 #endif
1603
1604 RenderCommandBindDescriptorSets* data = nullptr;
1605 uint32_t descriptorSetCounterForBarriers = 0;
1606 uint32_t currSet = firstSet;
1607
1608 // combine descriptor set bindings
1609 if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BIND_DESCRIPTOR_SETS)) {
1610 if (auto* prevCmd = static_cast<RenderCommandBindDescriptorSets*>(renderCommands_.back().rc); prevCmd) {
1611 if ((prevCmd->firstSet + prevCmd->setCount) == firstSet) {
1612 // add sets
1613 prevCmd->setCount += static_cast<uint32_t>(descriptorSetData.size());
1614 prevCmd->setCount = Math::min(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, prevCmd->setCount);
1615 data = prevCmd;
1616 }
1617 }
1618 }
1619
1620 // new allocation
1621 bool newAllocation = false;
1622 if (!data) {
1623 if (data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1624 newAllocation = true;
1625
1626 *data = {}; // default
1627
1628 data->psoHandle = stateData_.currentPsoHandle;
1629 data->firstSet = firstSet;
1630 data->setCount = Math::min(
1631 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT, static_cast<uint32_t>(descriptorSetData.size()));
1632 }
1633 }
1634
1635 if (data) {
1636 for (const auto& ref : descriptorSetData) {
1637 if (currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1638 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(ref.handle);
1639 // flag also for only this descriptor set
1640 bool globalDescSet = false;
1641 if ((additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) != 0U) {
1642 hadGlobalDescriptorSetBindings_ = true;
1643 globalDescSet = true;
1644 }
1645 // allocate offsets for this set
1646 if (!ref.dynamicOffsets.empty()) {
1647 const auto dynCount = static_cast<uint32_t>(ref.dynamicOffsets.size());
1648 if (auto* doData = AllocateRenderData<uint32_t>(allocator_, dynCount); doData) {
1649 auto& dynRef = data->descriptorSetDynamicOffsets[currSet];
1650 dynRef.dynamicOffsets = doData;
1651 dynRef.dynamicOffsetCount = dynCount;
1652 CloneData(dynRef.dynamicOffsets, dynCount * sizeof(uint32_t), ref.dynamicOffsets.data(),
1653 ref.dynamicOffsets.size_bytes());
1654 }
1655 }
1656
1657 data->descriptorSetHandles[currSet] = ref.handle;
1658
1659 // NOTE: for global descriptor sets we do not know yet if they have dynamic resources
1660 // The set might be updated from a random render node task / thread
1661 const bool hasDynamicBarrierResources =
1662 (globalDescSet) || nodeContextDescriptorSetManager_.HasDynamicBarrierResources(ref.handle);
1663 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1664 descriptorSetHandlesForBarriers_.push_back(ref.handle);
1665 descriptorSetCounterForBarriers++;
1666 }
1667 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1668 stateData_.currentBoundSets[currSet].descriptorSetHandle = ref.handle;
1669 stateData_.currentBoundSetsMask |= (1 << currSet);
1670 ++currSet;
1671 }
1672 }
1673
1674 if (newAllocation) {
1675 renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1676 }
1677 // if the currentBarrierPoint is null there has been some invalid bindings earlier
1678 if (stateData_.renderPassHasBegun && stateData_.currentBarrierPoint) {
1679 // add possible barriers before render pass
1680 stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1681 } else if (stateData_.automaticBarriersEnabled) {
1682 stateData_.dirtyDescriptorSetsForBarriers = true;
1683 }
1684 }
1685 }
1686
BindDescriptorSet(const uint32_t set,const BindDescriptorSetData & desriptorSetData)1687 void RenderCommandList::BindDescriptorSet(const uint32_t set, const BindDescriptorSetData& desriptorSetData)
1688 {
1689 BindDescriptorSets(set, { &desriptorSetData, 1U });
1690 }
1691
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1692 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1693 {
1694 BindDescriptorSetData bdsd[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1695 const uint32_t count = Math::min((uint32_t)handles.size(), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1696 for (uint32_t idx = 0U; idx < count; ++idx) {
1697 bdsd[idx].handle = handles[idx];
1698 }
1699 BindDescriptorSets(firstSet, { bdsd, count });
1700 }
1701
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1702 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1703 {
1704 BindDescriptorSetData bdsd = { handle, {} };
1705 BindDescriptorSets(set, { &bdsd, 1U });
1706 }
1707
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1708 void RenderCommandList::BindDescriptorSet(
1709 const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1710 {
1711 BindDescriptorSetData bdsd = { handle, dynamicOffsets };
1712 BindDescriptorSets(set, { &bdsd, 1U });
1713 }
1714
BuildAccelerationStructures(const AsBuildGeometryData & geometry,const BASE_NS::array_view<const AsGeometryTrianglesData> triangles,const BASE_NS::array_view<const AsGeometryAabbsData> aabbs,const BASE_NS::array_view<const AsGeometryInstancesData> instances)1715 void RenderCommandList::BuildAccelerationStructures(const AsBuildGeometryData& geometry,
1716 const BASE_NS::array_view<const AsGeometryTrianglesData> triangles,
1717 const BASE_NS::array_view<const AsGeometryAabbsData> aabbs,
1718 const BASE_NS::array_view<const AsGeometryInstancesData> instances)
1719 {
1720 if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1721 #if (RENDER_VULKAN_RT_ENABLED == 1)
1722 AddBarrierPoint(RenderCommandType::BUILD_ACCELERATION_STRUCTURE);
1723
1724 RenderCommandBuildAccelerationStructure* data =
1725 AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1726 if (!data) {
1727 return; // early out
1728 }
1729 *data = {};
1730 data->geometry = geometry;
1731
1732 if (!triangles.empty()) {
1733 AsGeometryTrianglesData* trianglesData =
1734 static_cast<AsGeometryTrianglesData*>(AllocateRenderData(allocator_,
1735 std::alignment_of<AsGeometryTrianglesData>(), sizeof(AsGeometryTrianglesData) * triangles.size()));
1736 data->trianglesData = trianglesData;
1737 data->trianglesView = { data->trianglesData, triangles.size() };
1738 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1739 data->trianglesView[idx] = triangles[idx];
1740 }
1741 }
1742 if (!aabbs.empty()) {
1743 AsGeometryAabbsData* aabbsData = static_cast<AsGeometryAabbsData*>(AllocateRenderData(
1744 allocator_, std::alignment_of<AsGeometryAabbsData>(), sizeof(AsGeometryAabbsData) * aabbs.size()));
1745 data->aabbsData = aabbsData;
1746 data->aabbsView = { data->aabbsData, aabbs.size() };
1747 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1748 data->aabbsView[idx] = aabbs[idx];
1749 }
1750 }
1751 if (!instances.empty()) {
1752 AsGeometryInstancesData* instancesData =
1753 static_cast<AsGeometryInstancesData*>(AllocateRenderData(allocator_,
1754 std::alignment_of<AsGeometryInstancesData>(), sizeof(AsGeometryInstancesData) * instances.size()));
1755 data->instancesData = instancesData;
1756 data->instancesView = { data->instancesData, instances.size() };
1757 for (size_t idx = 0; idx < instances.size(); ++idx) {
1758 data->instancesView[idx] = instances[idx];
1759 }
1760 }
1761 renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1762 #endif
1763 }
1764 }
1765
CopyAccelerationStructureInstances(const BufferOffset & destination,const array_view<const AsInstance> instances)1766 void RenderCommandList::CopyAccelerationStructureInstances(
1767 const BufferOffset& destination, const array_view<const AsInstance> instances)
1768 {
1769 if (RenderHandleUtil::IsGpuBuffer(destination.handle) && (!instances.empty())) {
1770 #if (RENDER_VULKAN_RT_ENABLED == 1)
1771 AddBarrierPoint(RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES);
1772
1773 RenderCommandCopyAccelerationStructureInstances* data =
1774 AllocateRenderCommand<RenderCommandCopyAccelerationStructureInstances>(allocator_);
1775 if (!data) {
1776 return; // early out
1777 }
1778
1779 data->destination = destination;
1780 data->instancesData = static_cast<AsInstance*>(
1781 AllocateRenderData(allocator_, std::alignment_of<AsInstance>(), sizeof(AsInstance) * instances.size()));
1782 if (data->instancesData) {
1783 data->instancesView = { data->instancesData, instances.size() };
1784 for (size_t idx = 0; idx < data->instancesView.size(); ++idx) {
1785 data->instancesView[idx] = instances[idx];
1786 }
1787 renderCommands_.push_back({ RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES, data });
1788 }
1789 #endif
1790 }
1791 }
1792
ClearColorImage(const RenderHandle handle,const ClearColorValue color,const array_view<const ImageSubresourceRange> ranges)1793 void RenderCommandList::ClearColorImage(
1794 const RenderHandle handle, const ClearColorValue color, const array_view<const ImageSubresourceRange> ranges)
1795 {
1796 #if (RENDER_VALIDATION_ENABLED == 1)
1797 {
1798 if (!RenderHandleUtil::IsGpuImage(handle)) {
1799 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid image handle given to ClearColorImage");
1800 }
1801 if (ranges.empty()) {
1802 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid ranges given to ClearColorImage");
1803 }
1804 {
1805 const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(handle);
1806 if ((desc.usageFlags & CORE_IMAGE_USAGE_TRANSFER_DST_BIT) == 0) {
1807 PLUGIN_LOG_E("RENDER_VALIDATION: Image missing usage flag TRANSFER_DST for ClearColorImage command");
1808 }
1809 }
1810 }
1811 #endif
1812 if (RenderHandleUtil::IsGpuImage(handle) && (!ranges.empty())) {
1813 AddBarrierPoint(RenderCommandType::CLEAR_COLOR_IMAGE);
1814
1815 auto* data = AllocateRenderCommand<RenderCommandClearColorImage>(allocator_);
1816 if (data) {
1817 data->handle = handle;
1818 data->imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1819 data->color = color;
1820 data->ranges = { AllocateRenderData<ImageSubresourceRange>(
1821 allocator_, static_cast<uint32_t>(ranges.size())),
1822 ranges.size() };
1823 if (!data->ranges.data()) {
1824 return;
1825 }
1826 CloneData(data->ranges.data(), data->ranges.size_bytes(), ranges.data(), ranges.size_bytes());
1827
1828 renderCommands_.push_back({ RenderCommandType::CLEAR_COLOR_IMAGE, data });
1829 }
1830 }
1831 }
1832
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1833 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1834 {
1835 #if (RENDER_VALIDATION_ENABLED == 1)
1836 ValidateViewport(nodeName_, viewportDesc);
1837 #endif
1838 auto* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1839 if (data) {
1840 data->viewportDesc = viewportDesc;
1841 data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1842 data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1843 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1844 }
1845 }
1846
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1847 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1848 {
1849 #if (RENDER_VALIDATION_ENABLED == 1)
1850 ValidateScissor(nodeName_, scissorDesc);
1851 #endif
1852 auto* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1853 if (data) {
1854 data->scissorDesc = scissorDesc;
1855 data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1856 data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1857 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1858 }
1859 }
1860
SetDynamicStateLineWidth(const float lineWidth)1861 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1862 {
1863 auto* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1864 if (data) {
1865 data->lineWidth = lineWidth;
1866 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1867 }
1868 }
1869
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1870 void RenderCommandList::SetDynamicStateDepthBias(
1871 const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1872 {
1873 auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1874 if (data) {
1875 data->depthBiasConstantFactor = depthBiasConstantFactor;
1876 data->depthBiasClamp = depthBiasClamp;
1877 data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1878 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1879 }
1880 }
1881
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1882 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1883 {
1884 constexpr uint32_t THRESHOLD = 4;
1885 #if (RENDER_VALIDATION_ENABLED == 1)
1886 if (blendConstants.size() > THRESHOLD) {
1887 PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1888 THRESHOLD);
1889 }
1890 #endif
1891 auto* data = AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1892 if (data) {
1893 *data = {};
1894 const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1895 for (uint32_t idx = 0; idx < bcCount; ++idx) {
1896 data->blendConstants[idx] = blendConstants[idx];
1897 }
1898 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1899 }
1900 }
1901
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1902 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1903 {
1904 auto* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1905 if (data) {
1906 data->minDepthBounds = minDepthBounds;
1907 data->maxDepthBounds = maxDepthBounds;
1908 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1909 }
1910 }
1911
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1912 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1913 {
1914 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1915 if (data) {
1916 data->dynamicState = StencilDynamicState::COMPARE_MASK;
1917 data->faceMask = faceMask;
1918 data->mask = compareMask;
1919 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1920 }
1921 }
1922
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1923 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1924 {
1925 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1926 if (data) {
1927 data->dynamicState = StencilDynamicState::WRITE_MASK;
1928 data->faceMask = faceMask;
1929 data->mask = writeMask;
1930 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1931 }
1932 }
1933
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1934 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1935 {
1936 auto* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1937 if (data) {
1938 data->dynamicState = StencilDynamicState::REFERENCE;
1939 data->faceMask = faceMask;
1940 data->mask = reference;
1941 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1942 }
1943 }
1944
SetDynamicStateFragmentShadingRate(const Size2D & fragmentSize,const FragmentShadingRateCombinerOps & combinerOps)1945 void RenderCommandList::SetDynamicStateFragmentShadingRate(
1946 const Size2D& fragmentSize, const FragmentShadingRateCombinerOps& combinerOps)
1947 {
1948 auto* data = AllocateRenderCommand<RenderCommandDynamicStateFragmentShadingRate>(allocator_);
1949 if (data) {
1950 #if (RENDER_VALIDATION_ENABLED == 1)
1951 ValidateFragmentShadingRate(fragmentSize);
1952 #endif
1953 // valid values for sizes from 0-4
1954 constexpr uint32_t maxValue { 4u };
1955 constexpr uint32_t valueMapper[maxValue + 1u] = { 1u, 1u, 2u, 2u, 4u };
1956 Size2D fs = fragmentSize;
1957 fs.width = (fs.width <= maxValue) ? valueMapper[fs.width] : maxValue;
1958 fs.height = (fs.height <= maxValue) ? valueMapper[fs.height] : maxValue;
1959
1960 data->fragmentSize = fs;
1961 data->combinerOps = combinerOps;
1962 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE, data });
1963 }
1964 }
1965
SetExecuteBackendFramePosition()1966 void RenderCommandList::SetExecuteBackendFramePosition()
1967 {
1968 if (stateData_.executeBackendFrameSet == false) {
1969 AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1970
1971 auto* data = AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1972 if (data) {
1973 data->id = 0;
1974 data->command = nullptr;
1975 renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1976 stateData_.executeBackendFrameSet = true;
1977 }
1978 } else {
1979 PLUGIN_LOG_W("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1980 }
1981 }
1982
SetExecuteBackendCommand(IRenderBackendCommand::Ptr backendCommand)1983 void RenderCommandList::SetExecuteBackendCommand(IRenderBackendCommand::Ptr backendCommand)
1984 {
1985 if (backendCommand) {
1986 AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1987
1988 auto* data = AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1989 if (data) {
1990 data->id = 0;
1991 data->command = backendCommand.get();
1992 renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1993
1994 backendCommands_.push_back(backendCommand);
1995 }
1996 }
1997 }
1998
SetBackendCommand(IRenderBackendPositionCommand::Ptr backendCommand,RenderBackendCommandPosition backendCommandPosition)1999 void RenderCommandList::SetBackendCommand(
2000 IRenderBackendPositionCommand::Ptr backendCommand, RenderBackendCommandPosition backendCommandPosition)
2001 {
2002 if (backendCommand) {
2003 processBackendCommands_.push_back({ backendCommand, backendCommandPosition });
2004 }
2005 }
2006
GetProcessBackendCommands()2007 array_view<ProcessBackendCommand> RenderCommandList::GetProcessBackendCommands()
2008 {
2009 return processBackendCommands_;
2010 }
2011
BeginDebugMarker(const BASE_NS::string_view name,const BASE_NS::Math::Vec4 color)2012 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name, const BASE_NS::Math::Vec4 color)
2013 {
2014 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2015 if (!name.empty()) {
2016 RenderCommandBeginDebugMarker* data = AllocateRenderCommand<RenderCommandBeginDebugMarker>(allocator_);
2017 if (data) {
2018 #if (RENDER_VALIDATION_ENABLED == 1)
2019 if (name.size() > RenderCommandBeginDebugMarker::SIZE_OF_NAME) {
2020 PLUGIN_LOG_W("RENDER_VALIDATION: Debug marker name larger than (%u)",
2021 RenderCommandBeginDebugMarker::SIZE_OF_NAME);
2022 }
2023 #endif
2024 data->name = name;
2025 data->color = { color };
2026 renderCommands_.push_back({ RenderCommandType::BEGIN_DEBUG_MARKER, data });
2027 debugMarkerStack_.stackCounter++;
2028 debugMarkerStack_.commandCount++;
2029 }
2030 }
2031 #endif
2032 }
2033
BeginDebugMarker(const BASE_NS::string_view name)2034 void RenderCommandList::BeginDebugMarker(const BASE_NS::string_view name)
2035 {
2036 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2037 BeginDebugMarker(name, { 1.0f, 1.0f, 1.0f, 1.0f });
2038 #endif
2039 }
2040
EndDebugMarker()2041 void RenderCommandList::EndDebugMarker()
2042 {
2043 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2044 if (debugMarkerStack_.stackCounter > 0U) {
2045 RenderCommandEndDebugMarker* data = AllocateRenderCommand<RenderCommandEndDebugMarker>(allocator_);
2046 if (data) {
2047 data->id = 0;
2048 renderCommands_.push_back({ RenderCommandType::END_DEBUG_MARKER, data });
2049 debugMarkerStack_.stackCounter--;
2050 debugMarkerStack_.commandCount++;
2051 }
2052 }
2053 #endif
2054 }
2055
ValidateRenderPass(const RenderPassDesc & renderPassDesc)2056 void RenderCommandList::ValidateRenderPass(const RenderPassDesc& renderPassDesc)
2057 {
2058 if (stateData_.renderPassHasBegun) {
2059 #if (RENDER_VALIDATION_ENABLED == 1)
2060 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_hasbegun_",
2061 "RenderCommandList: render pass is active, needs to be end before starting a new (node: %s)",
2062 nodeName_.c_str());
2063 #endif
2064 stateData_.validCommandList = false;
2065 }
2066 // validate render pass attachments
2067 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
2068 if (!RenderHandleUtil::IsValid(renderPassDesc.attachmentHandles[idx])) {
2069 #if (RENDER_VALIDATION_ENABLED == 1)
2070 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_attachments_",
2071 "RenderCommandList: Invalid render pass attachment handle in index: %u (node:%s)", idx,
2072 nodeName_.c_str());
2073 #endif
2074 stateData_.validCommandList = false;
2075 }
2076 }
2077 }
2078
ValidatePipeline()2079 void RenderCommandList::ValidatePipeline()
2080 {
2081 if (!stateData_.validPso) {
2082 stateData_.validCommandList = false;
2083 #if (RENDER_VALIDATION_ENABLED == 1)
2084 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidatePipeline_", "RenderCommandList: PSO not bound.");
2085 #endif
2086 }
2087 }
2088
ValidatePipelineLayout()2089 void RenderCommandList::ValidatePipelineLayout()
2090 {
2091 if (stateData_.checkBindPipelineLayout) {
2092 stateData_.checkBindPipelineLayout = false;
2093 // fast check without validation
2094 const uint32_t pipelineLayoutSetsMask =
2095 RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
2096 if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
2097 #if (RENDER_VALIDATION_ENABLED == 1)
2098 PLUGIN_LOG_ONCE_E(
2099 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
2100 #endif
2101 }
2102 #if (RENDER_VALIDATION_ENABLED == 1)
2103 const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
2104 const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
2105 ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
2106 : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
2107 uint32_t plDescriptorSetCount = 0U;
2108 uint32_t bindCount = 0U;
2109 uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
2110 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
2111 const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
2112 if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
2113 bindCount++;
2114 bindSetIndices[idx] = idx;
2115 }
2116 if (pl.descriptorSetLayouts[idx].set != PipelineLayoutConstants::INVALID_INDEX) {
2117 plDescriptorSetCount++;
2118 }
2119 }
2120 if (bindCount < plDescriptorSetCount) {
2121 const auto debugName = nodeName_ + "not_all_pl_bound";
2122 PLUGIN_LOG_ONCE_E(nodeName_ + "not_all_pl_bound",
2123 "RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
2124 }
2125 #endif
2126 }
2127 }
2128
GetInterface(const BASE_NS::Uid & uid) const2129 const CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid) const
2130 {
2131 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2132 return this;
2133 }
2134 return nullptr;
2135 }
2136
GetInterface(const BASE_NS::Uid & uid)2137 CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid)
2138 {
2139 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
2140 return this;
2141 }
2142 return nullptr;
2143 }
2144
Ref()2145 void RenderCommandList::Ref() {}
2146
Unref()2147 void RenderCommandList::Unref() {}
2148 RENDER_END_NAMESPACE()
2149