1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_command_list.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "nodecontext/render_node_context_manager.h"
32 #include "util/linear_allocator.h"
33 #include "util/log.h"
34
35 using namespace BASE_NS;
36
37 RENDER_BEGIN_NAMESPACE()
38 namespace {
39 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)40 void ValidateImageUsageFlags(const GpuResourceManager& gpuResourceMgr, const RenderHandle handl,
41 const ImageUsageFlags imageUsageFlags, const string_view str)
42 {
43 if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
44 PLUGIN_LOG_E("RENDER_VALIDATION: gpu image (handle: %" PRIu64
45 ") (name: %s), not created with needed flags: %s ",
46 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data());
47 }
48 }
49
ValidateBufferUsageFlags(const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)50 void ValidateBufferUsageFlags(const GpuResourceManager& gpuResourceMgr, const RenderHandle handl,
51 const BufferUsageFlags bufferUsageFlags, const string_view str)
52 {
53 if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
54 PLUGIN_LOG_E("RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
55 ") (name: %s), not created with needed flags: %s",
56 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data());
57 }
58 }
59
ValidateDescriptorTypeBinding(const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)60 void ValidateDescriptorTypeBinding(
61 const GpuResourceManager& gpuResourceMgr, const DescriptorSetLayoutBindingResources& bindingRes)
62 {
63 for (const auto& ref : bindingRes.buffers) {
64 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
65 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
66 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
67 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
68 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
69 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
70 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
71 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
72 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
73 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
74 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT,
75 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
76 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
77 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
78 "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
79 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
80 ValidateBufferUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT,
81 "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
82 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
83 } else {
84 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
85 }
86 }
87 for (const auto& ref : bindingRes.images) {
88 if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
89 (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
90 ValidateImageUsageFlags(
91 gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT, "CORE_IMAGE_USAGE_SAMPLED_BIT");
92 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
93 ValidateImageUsageFlags(
94 gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT, "CORE_IMAGE_USAGE_STORAGE_BIT");
95 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
96 ValidateImageUsageFlags(gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
97 "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
98 } else {
99 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
100 }
101 }
102 for (const auto& ref : bindingRes.samplers) {
103 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
104 } else {
105 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
106 }
107 }
108 }
109
ValidateRenderPassAttachment(const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDsc)110 void ValidateRenderPassAttachment(const GpuResourceManager& gpuResourceMgr, const RenderPassDesc& renderPassDsc)
111 {
112 const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDsc.attachmentHandles[0]);
113 const uint32_t baseWidth = baseDesc.width;
114 const uint32_t baseHeight = baseDesc.height;
115 for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDsc.attachmentCount; ++attachmentIdx) {
116 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDsc.attachmentHandles[attachmentIdx]);
117 if (desc.width != baseWidth || desc.height != baseHeight) {
118 PLUGIN_LOG_E("RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
119 attachmentIdx);
120 PLUGIN_LOG_E("RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
121 baseHeight, desc.width, desc.height);
122 }
123 }
124 if ((renderPassDsc.renderArea.extentWidth == 0) || (renderPassDsc.renderArea.extentHeight == 0)) {
125 PLUGIN_LOG_E("RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u)",
126 renderPassDsc.renderArea.extentWidth, renderPassDsc.renderArea.extentHeight);
127 }
128 if ((renderPassDsc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
129 (renderPassDsc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
130 PLUGIN_LOG_E(
131 "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
132 "%u, "
133 "baseHeight: %u)",
134 renderPassDsc.renderArea.offsetX, renderPassDsc.renderArea.offsetY, baseWidth, baseHeight);
135 }
136 }
137
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)138 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
139 const ImageSubresourceRange& imageSubresourceRange)
140 {
141 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
142 if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
143 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
144 imageSubresourceRange.baseMipLevel, desc.mipCount);
145 }
146 if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
147 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
148 imageSubresourceRange.baseArrayLayer, desc.layerCount);
149 }
150 }
151
ValidateViewport(const ViewportDesc & vd)152 void ValidateViewport(const ViewportDesc& vd)
153 {
154 if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
155 PLUGIN_LOG_E(
156 "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger", vd.width, vd.height);
157 }
158 }
159
ValidateScissor(const ScissorDesc & sd)160 void ValidateScissor(const ScissorDesc& sd)
161 {
162 if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
163 PLUGIN_LOG_E("RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero",
164 sd.extentWidth, sd.extentHeight);
165 }
166 }
167 #endif
168
169 constexpr size_t MEMORY_ALIGNMENT { 16 };
170 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
171 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
172 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
173
174 // automatic acquire and release barriers
175 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
176
GetAlignedBytesize(const size_t byteSize,const size_t alignment)177 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
178 {
179 return (byteSize + alignment - 1) & (~(alignment - 1));
180 }
181
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t byteSz)182 void* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, const size_t byteSz)
183 {
184 PLUGIN_ASSERT(byteSz > 0);
185 void* rc = nullptr;
186 if (!allocator.allocators.empty()) {
187 const size_t currentIndex = allocator.allocators.size() - 1;
188 rc = allocator.allocators[currentIndex]->Allocate(byteSz);
189 }
190
191 if (rc == nullptr) { // current allocator is out of memory
192 size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
193 const size_t currentIndex = allocator.allocators.size();
194 if (currentIndex > 0) {
195 allocatorByteSize =
196 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
197 }
198 allocator.allocators.emplace_back(make_unique<LinearAllocator>(allocatorByteSize, MEMORY_ALIGNMENT));
199
200 rc = allocator.allocators[currentIndex]->Allocate(byteSz);
201 if (rc == nullptr) {
202 PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
203 PLUGIN_ASSERT(false);
204 }
205 }
206 return rc;
207 }
208
209 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)210 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
211 {
212 return static_cast<T*>(AllocateRenderData(allocator, sizeof(T) * count));
213 }
214
215 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)216 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
217 {
218 return static_cast<T*>(AllocateRenderData(allocator, sizeof(T)));
219 }
220 } // namespace
221
RenderCommandList(NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)222 RenderCommandList::RenderCommandList(NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr,
223 const GpuResourceManager& gpuResourceMgr, const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue,
224 const bool enableMultiQueue)
225 : IRenderCommandList(),
226 #if (RENDER_VALIDATION_ENABLED == 1)
227 gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
228 #endif
229 nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
230 enableMultiQueue_(enableMultiQueue)
231 {}
232
BeginFrame()233 void RenderCommandList::BeginFrame()
234 {
235 if (allocator_.allocators.size() == 1) { // size is good for this frame
236 allocator_.allocators[0]->Reset();
237 } else if (allocator_.allocators.size() > 1) {
238 size_t fullByteSize = 0;
239 size_t alignment = 0;
240 for (auto& ref : allocator_.allocators) {
241 fullByteSize += ref->GetCurrentByteSize();
242 alignment = Math::max(alignment, (size_t)ref->GetAlignment());
243 ref.reset();
244 }
245 allocator_.allocators.clear();
246
247 // add some room for current frame allocation for new render commands
248 const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
249 fullByteSize += extraBytes;
250
251 // create new single allocation for combined previous size and some extra bytes
252 const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
253 allocator_.allocators.emplace_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
254 }
255
256 ResetStateData();
257
258 const auto clearAndReserve = [](auto& vec) {
259 const size_t count = vec.size();
260 vec.clear();
261 vec.reserve(count);
262 };
263
264 clearAndReserve(renderCommands_);
265 clearAndReserve(customBarriers_);
266 clearAndReserve(vertexInputBufferBarriers_);
267 clearAndReserve(descriptorSetHandlesForBarriers_);
268
269 validReleaseAcquire_ = false;
270 hasMultiRenderCommandListSubpasses_ = false;
271 multiRendercommandListSubpassCount_ = 1;
272 }
273
SetValidGpuQueueReleaseAcquireBarriers()274 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
275 {
276 if (enableMultiQueue_) {
277 validReleaseAcquire_ = true;
278 }
279 }
280
BeforeRenderNodeExecuteFrame()281 void RenderCommandList::BeforeRenderNodeExecuteFrame()
282 {
283 // add possible barrier point for gpu queue transfer acquire
284 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
285 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
286 }
287 }
288
AfterRenderNodeExecuteFrame()289 void RenderCommandList::AfterRenderNodeExecuteFrame()
290 {
291 #if (RENDER_VALIDATION_ENABLED == 1)
292 if (stateData_.renderPassHasBegun) {
293 PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
294 }
295 if (!stateData_.automaticBarriersEnabled) {
296 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
297 }
298 #endif
299
300 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
301 if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
302 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
303 }
304
305 // add possible barrier point for gpu queue transfer release
306 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
307 }
308 }
309
GetRenderCommands() const310 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
311 {
312 #if (RENDER_VALIDATION_ENABLED == 1)
313 if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
314 PLUGIN_LOG_E("RENDER_VALIDATION: invalid state data in render command list");
315 }
316 #endif
317
318 return array_view<const RenderCommandWithType>(renderCommands_.data(), renderCommands_.size());
319 }
320
HasValidRenderCommands() const321 bool RenderCommandList::HasValidRenderCommands() const
322 {
323 const uint32_t renderCommandCount = GetRenderCommandCount();
324 bool valid = false;
325 if (enableMultiQueue_) {
326 if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
327 // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
328 valid = validReleaseAcquire_;
329 } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
330 valid = true;
331 }
332 } else {
333 valid = (renderCommandCount > 0);
334 }
335 valid = valid && stateData_.validCommandList;
336
337 return valid;
338 }
339
GetRenderCommandCount() const340 uint32_t RenderCommandList::GetRenderCommandCount() const
341 {
342 return (uint32_t)renderCommands_.size();
343 }
344
GetGpuQueue() const345 GpuQueue RenderCommandList::GetGpuQueue() const
346 {
347 return gpuQueue_;
348 }
349
HasMultiRenderCommandListSubpasses() const350 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
351 {
352 return hasMultiRenderCommandListSubpasses_;
353 }
354
GetMultiRenderCommandListSubpassCount() const355 uint32_t RenderCommandList::GetMultiRenderCommandListSubpassCount() const
356 {
357 return multiRendercommandListSubpassCount_;
358 }
359
GetCustomBarriers() const360 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
361 {
362 return array_view<const CommandBarrier>(customBarriers_.data(), customBarriers_.size());
363 }
364
GetVertexInputBufferBarriers() const365 array_view<const VertexBuffer> RenderCommandList::GetVertexInputBufferBarriers() const
366 {
367 return array_view<const VertexBuffer>(vertexInputBufferBarriers_.data(), vertexInputBufferBarriers_.size());
368 }
369
GetDescriptorSetHandles() const370 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
371 {
372 return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
373 }
374
AddBarrierPoint(const RenderCommandType renderCommandType)375 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
376 {
377 if (!stateData_.automaticBarriersEnabled) {
378 return; // no barrier point added
379 }
380
381 RenderCommandBarrierPoint* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
382 if (data) {
383 *data = {}; // zero initialize
384
385 data->renderCommandType = renderCommandType;
386 data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
387
388 // update new index (within render pass there might not be any dirty descriptor sets at this stage)
389 const uint32_t descriptorSetBeginIndex = (uint32_t)descriptorSetHandlesForBarriers_.size();
390 data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
391 data->descriptorSetHandleCount = 0;
392 // update new index (only valid with render pass)
393 data->vertexIndexBarrierIndexBegin = (uint32_t)vertexInputBufferBarriers_.size();
394 data->vertexIndexBarrierCount = 0;
395
396 // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
397 const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
398 renderCommandType == RenderCommandType::DISPATCH ||
399 renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
400 if (handleDescriptorSets) {
401 stateData_.dirtyDescriptorSetsForBarriers = false;
402 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
403 // only add descriptor set handles for barriers if there are dynamic barrier resources
404 if (stateData_.currentBoundSets[idx].hasDynamicBarrierResources) {
405 descriptorSetHandlesForBarriers_.emplace_back(stateData_.currentBoundSets[idx].descriptorSetHandle);
406 }
407 }
408 data->descriptorSetHandleCount =
409 (uint32_t)descriptorSetHandlesForBarriers_.size() - descriptorSetBeginIndex;
410 }
411
412 const bool handleCustomBarriers =
413 ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
414 if (handleCustomBarriers) {
415 const int32_t newCount = (int32_t)customBarriers_.size() - stateData_.currentCustomBarrierIndices.prevSize;
416 if (newCount > 0) {
417 data->customBarrierIndexBegin = (uint32_t)stateData_.currentCustomBarrierIndices.prevSize;
418 data->customBarrierCount = (uint32_t)newCount;
419
420 stateData_.currentCustomBarrierIndices.prevSize = (int32_t)customBarriers_.size();
421 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
422 }
423 }
424
425 // store current barrier point for render command list
426 // * binding descriptor sets (with dynamic barrier resources)
427 // * binding vertex and index buffers (with dynamic barrier resources)
428 // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
429 stateData_.currentBarrierPoint = data;
430
431 renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
432 }
433 }
434
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)435 void RenderCommandList::Draw(
436 const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
437 {
438 #if (RENDER_VALIDATION_ENABLED == 1)
439 if (!stateData_.renderPassHasBegun) {
440 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
441 }
442 #endif
443
444 if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
445 ValidatePipeline();
446 ValidatePipelineLayout();
447
448 RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
449 if (data) {
450 data->drawType = DrawType::DRAW;
451 data->vertexCount = vertexCount;
452 data->instanceCount = instanceCount;
453 data->firstVertex = firstVertex;
454 data->firstInstance = firstInstance;
455 data->indexCount = 0;
456 data->firstIndex = 0;
457 data->vertexOffset = 0;
458
459 renderCommands_.push_back({ RenderCommandType::DRAW, data });
460 }
461 }
462 }
463
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)464 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
465 const int32_t vertexOffset, const uint32_t firstInstance)
466 {
467 #if (RENDER_VALIDATION_ENABLED == 1)
468 if (!stateData_.renderPassHasBegun) {
469 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
470 }
471 #endif
472
473 if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
474 ValidatePipeline();
475 ValidatePipelineLayout();
476
477 RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
478 if (data) {
479 data->drawType = DrawType::DRAW_INDEXED;
480 data->vertexCount = 0;
481 data->instanceCount = instanceCount;
482 data->firstVertex = 0;
483 data->firstInstance = firstInstance;
484 data->indexCount = indexCount;
485 data->firstIndex = firstIndex;
486 data->vertexOffset = vertexOffset;
487
488 renderCommands_.push_back({ RenderCommandType::DRAW, data });
489 }
490 }
491 }
492
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)493 void RenderCommandList::DrawIndirect(
494 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
495 {
496 #if (RENDER_VALIDATION_ENABLED == 1)
497 if (!stateData_.renderPassHasBegun) {
498 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
499 }
500 #endif
501
502 if (stateData_.renderPassHasBegun) {
503 ValidatePipeline();
504 ValidatePipelineLayout();
505
506 RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
507 if (data) {
508 data->drawType = DrawType::DRAW_INDIRECT;
509 data->argsHandle = bufferHandle;
510 data->offset = offset;
511 data->drawCount = drawCount;
512 data->stride = stride;
513
514 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
515 }
516 }
517 }
518
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)519 void RenderCommandList::DrawIndexedIndirect(
520 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
521 {
522 #if (RENDER_VALIDATION_ENABLED == 1)
523 if (!stateData_.renderPassHasBegun) {
524 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
525 }
526 #endif
527
528 if (stateData_.renderPassHasBegun) {
529 ValidatePipeline();
530 ValidatePipelineLayout();
531
532 RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
533 if (data) {
534 data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
535 data->argsHandle = bufferHandle;
536 data->offset = offset;
537 data->drawCount = drawCount;
538 data->stride = stride;
539
540 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
541 }
542 }
543 }
544
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)545 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
546 {
547 if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
548 ValidatePipeline();
549 ValidatePipelineLayout();
550
551 AddBarrierPoint(RenderCommandType::DISPATCH);
552
553 RenderCommandDispatch* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
554 if (data) {
555 data->groupCountX = groupCountX;
556 data->groupCountY = groupCountY;
557 data->groupCountZ = groupCountZ;
558
559 renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
560 }
561 }
562 }
563
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)564 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
565 {
566 ValidatePipeline();
567 ValidatePipelineLayout();
568
569 AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
570
571 RenderCommandDispatchIndirect* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
572 if (data) {
573 data->argsHandle = bufferHandle;
574 data->offset = offset;
575
576 renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
577 }
578 }
579
BindPipeline(const RenderHandle psoHandle)580 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
581 {
582 if (stateData_.currentPsoHandle.id == psoHandle.id) {
583 return; // early out
584 }
585
586 bool valid = RenderHandleUtil::IsValid(psoHandle);
587
588 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
589 PipelineBindPoint pipelineBindPoint {};
590 if (handleType == RenderHandleType::COMPUTE_PSO) {
591 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
592 } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
593 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
594 } else {
595 valid = false;
596 }
597
598 stateData_.checkBindPipelineLayout = true;
599 #if (RENDER_VALIDATION_ENABLED == 1)
600 if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
601 if (!stateData_.renderPassHasBegun) {
602 valid = false;
603 PLUGIN_LOG_E("RENDER_VALIDATION: bind pipeline after render pass begin");
604 }
605 }
606 #endif
607
608 stateData_.validPso = valid;
609 ValidatePipeline();
610
611 stateData_.currentPsoHandle = psoHandle;
612 stateData_.currentPsoBindPoint = pipelineBindPoint;
613
614 RenderCommandBindPipeline* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
615 if (data) {
616 data->psoHandle = psoHandle;
617 data->pipelineBindPoint = pipelineBindPoint;
618
619 renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
620 }
621 }
622
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)623 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
624 {
625 ValidatePipeline();
626
627 // push constant is not used/allocated if byte size is bigger than supported max
628 if ((pushConstant.byteSize > 0) &&
629 (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && data) {
630 RenderCommandPushConstant* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
631 uint8_t* pushData = static_cast<uint8_t*>(AllocateRenderData(allocator_, pushConstant.byteSize));
632 if (rc && pushData) {
633 rc->psoHandle = stateData_.currentPsoHandle;
634 rc->pushConstant = pushConstant;
635 rc->data = pushData;
636 const bool res = CloneData(rc->data, pushConstant.byteSize, data, pushConstant.byteSize);
637 PLUGIN_UNUSED(res);
638 PLUGIN_ASSERT(res);
639
640 renderCommands_.emplace_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
641 }
642 } else if (pushConstant.byteSize > 0) {
643 #if (RENDER_VALIDATION_ENABLED == 1)
644 PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
645 PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
646 #endif
647 }
648 }
649
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)650 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
651 {
652 ValidatePipeline();
653
654 #if (RENDER_VALIDATION_ENABLED == 1)
655 if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
656 PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
657 }
658 #endif
659
660 if (!vertexBuffers.empty()) {
661 RenderCommandBindVertexBuffers* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
662 if (data) {
663 VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
664 uint32_t dynamicBarrierVertexBufferCount = 0;
665 const uint32_t vertexBufferCount =
666 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, (uint32_t)vertexBuffers.size());
667 data->vertexBufferCount = vertexBufferCount;
668 RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
669 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
670 data->vertexBuffers[idx] = vertexBuffers[idx];
671 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
672 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
673 (vertexBuffers[idx].byteSize > 0)) {
674 // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
675 dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
676 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
677 previousVbHandle = currVbHandle;
678 }
679 }
680
681 // add possible vertex/index buffer barriers before render pass
682 if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
683 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
684 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
685 const size_t currCount = vertexInputBufferBarriers_.size();
686 vertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
687 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
688 vertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
689 }
690 }
691
692 renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
693 }
694 }
695 }
696
BindIndexBuffer(const IndexBuffer & indexBuffer)697 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
698 {
699 ValidatePipeline();
700
701 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
702 #if (RENDER_VALIDATION_ENABLED == 1)
703 if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
704 PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
705 }
706 #endif
707
708 RenderCommandBindIndexBuffer* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
709 if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
710 data->indexBuffer = indexBuffer;
711 if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
712 vertexInputBufferBarriers_.push_back(
713 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
714 }
715 renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
716 }
717 }
718
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)719 void RenderCommandList::BeginRenderPass(
720 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
721 {
722 #if (RENDER_VALIDATION_ENABLED == 1)
723 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
724 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
725 }
726 #endif
727
728 if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
729 PLUGIN_LOG_E(
730 "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
731 renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
732 stateData_.validCommandList = false;
733 }
734 if (stateData_.renderPassHasBegun) {
735 PLUGIN_LOG_E("RenderCommandList: render pass is active, needs to be end before starting a new");
736 stateData_.validCommandList = false;
737 }
738 stateData_.renderPassHasBegun = true;
739 stateData_.renderPassStartIndex = 0;
740 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
741
742 if (renderPassDesc.attachmentCount > 0) {
743 #if (RENDER_VALIDATION_ENABLED == 1)
744 ValidateRenderPassAttachment(gpuResourceMgr_, renderPassDesc);
745 #endif
746 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
747
748 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
749 // NOTE: hashed in the backend
750 PLUGIN_ASSERT(renderPassDesc.subpassCount == (uint32_t)subpassDescs.size());
751
752 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
753 data->renderPassDesc = renderPassDesc;
754 data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
755 data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
756 data->subpassStartIndex = 0;
757 // if false -> initial layout is undefined
758 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
759
760 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
761 renderPassDesc.subpassCount };
762 if (!data->subpasses.data()) {
763 return;
764 }
765
766 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
767 allocator_, renderPassDesc.subpassCount),
768 renderPassDesc.subpassCount };
769 if (!data->subpassResourceStates.data()) {
770 return;
771 }
772
773 CloneData(
774 data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
775
776 for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
777 const auto& subpassRef = subpassDescs[subpassIdx];
778
779 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
780 subpassResourceStates = {};
781
782 ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
783 ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
784 ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
785 ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
786 }
787
788 // render pass layouts will be updated by render graph
789 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
790 }
791 }
792 }
793
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)794 void RenderCommandList::BeginRenderPass(
795 const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
796 {
797 #if (RENDER_VALIDATION_ENABLED == 1)
798 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
799 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
800 }
801 #endif
802
803 if (stateData_.renderPassHasBegun) {
804 PLUGIN_LOG_E("BeginRenderPass: render pass is active, needs to be end before starting a new");
805 stateData_.validCommandList = false;
806 }
807 if (subpassStartIdx >= renderPassDesc.subpassCount) {
808 PLUGIN_LOG_E(
809 "RenderCommandList: BeginRenderPass: subpassStartIdx (%u) must be smaller than renderPassDesc.subpassCount "
810 "(%u)",
811 subpassStartIdx, renderPassDesc.subpassCount);
812 stateData_.validCommandList = false;
813 }
814
815 if (hasMultiRenderCommandListSubpasses_) {
816 PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
817 stateData_.validCommandList = false;
818 } else if (renderPassDesc.subpassCount > 1) {
819 hasMultiRenderCommandListSubpasses_ = true;
820 multiRendercommandListSubpassCount_ = renderPassDesc.subpassCount;
821 }
822
823 stateData_.renderPassHasBegun = true;
824 stateData_.renderPassStartIndex = subpassStartIdx;
825 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
826
827 if (renderPassDesc.attachmentCount > 0) {
828 #if (RENDER_VALIDATION_ENABLED == 1)
829 ValidateRenderPassAttachment(gpuResourceMgr_, renderPassDesc);
830 #endif
831 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
832
833 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
834 // NOTE: hashed in the backend
835 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
836 data->renderPassDesc = renderPassDesc;
837 data->subpassStartIndex = subpassStartIdx;
838 // if false -> initial layout is undefined
839 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
840
841 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
842 renderPassDesc.subpassCount };
843 if (!data->subpasses.data()) {
844 return;
845 }
846
847 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
848 allocator_, renderPassDesc.subpassCount),
849 renderPassDesc.subpassCount };
850 if (!data->subpassResourceStates.data()) {
851 return;
852 }
853
854 for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
855 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
856 subpassResourceStates = {};
857 data->subpasses[subpassIdx] = {};
858
859 if (subpassIdx == subpassStartIdx) {
860 data->subpasses[subpassIdx] = subpassDesc;
861 ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
862 ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
863 ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
864 ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
865 }
866 }
867
868 // render pass layouts will be updated by render graph
869 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
870 }
871 }
872 }
873
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)874 void RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
875 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
876 {
877 for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
878 const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
879
880 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
881 // NOTE: mipLevel and layers are not updated to GpuResourceState
882 // NOTE: validation needed for invalid handles
883 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
884 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
885 refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
886 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
887 refState.gpuQueue = gpuQueue_;
888 // if used e.g. as input and color attachment use general layout
889 if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
890 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
891 } else {
892 subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
893 ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
894 : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
895 }
896 #if (RENDER_VALIDATION_ENABLED == 1)
897 ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
898 "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
899 #endif
900 }
901 }
902
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)903 void RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
904 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
905 {
906 for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
907 const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
908
909 // NOTE: mipLevel and layers are not updated to GpuResourceState
910 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
911 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
912 refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
913 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
914 refState.gpuQueue = gpuQueue_;
915 // if used e.g. as input and color attachment use general layout
916 subpassResourceStates.layouts[attachmentIndex] =
917 (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
918 ? CORE_IMAGE_LAYOUT_GENERAL
919 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
920
921 #if (RENDER_VALIDATION_ENABLED == 1)
922 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
923 ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
924 "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
925 #endif
926 }
927 }
928
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)929 void RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
930 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
931 {
932 for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
933 const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
934
935 // NOTE: mipLevel and layers are not updated to GpuResourceState
936 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
937 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
938 refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
939 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
940 refState.gpuQueue = gpuQueue_;
941 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
942
943 #if (RENDER_VALIDATION_ENABLED == 1)
944 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
945 ValidateImageUsageFlags(gpuResourceMgr_, handle, ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
946 "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
947 #endif
948 }
949 }
950
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)951 void RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
952 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
953 {
954 if (subpassRef.depthAttachmentCount == 1) {
955 const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
956
957 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
958 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
959 refState.accessFlags |=
960 (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
961 refState.pipelineStageFlags |=
962 (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
963 refState.gpuQueue = gpuQueue_;
964 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
965
966 #if (RENDER_VALIDATION_ENABLED == 1)
967 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
968 ValidateImageUsageFlags(gpuResourceMgr_, handle,
969 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
970 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
971 #endif
972 }
973 if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
974 const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
975
976 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
977 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
978 refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
979 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
980 refState.gpuQueue = gpuQueue_;
981 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
982
983 #if (RENDER_VALIDATION_ENABLED == 1)
984 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
985 ValidateImageUsageFlags(gpuResourceMgr_, handle,
986 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
987 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
988 #endif
989 }
990 }
991
NextSubpass(const SubpassContents & subpassContents)992 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
993 {
994 RenderCommandNextSubpass* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
995 if (data) {
996 data->subpassContents = subpassContents;
997 data->renderCommandListIndex = 0; // will be updated in the render graph
998
999 renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1000 }
1001 }
1002
EndRenderPass()1003 void RenderCommandList::EndRenderPass()
1004 {
1005 if (!stateData_.renderPassHasBegun) {
1006 PLUGIN_LOG_E("RenderCommandList: render pass needs to begin before calling end");
1007 stateData_.validCommandList = false;
1008 }
1009
1010 RenderCommandEndRenderPass* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1011 if (data) {
1012 // will be updated in render graph if multi render command list render pass
1013 data->endType = RenderPassEndType::END_RENDER_PASS;
1014 data->subpassStartIndex = stateData_.renderPassStartIndex;
1015 data->subpassCount = stateData_.renderPassSubpassCount;
1016
1017 renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1018 }
1019
1020 stateData_.renderPassHasBegun = false;
1021 stateData_.renderPassStartIndex = 0;
1022 stateData_.renderPassSubpassCount = 0;
1023 }
1024
BeginDisableAutomaticBarrierPoints()1025 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1026 {
1027 #if (RENDER_VALIDATION_ENABLED == 1)
1028 if (!stateData_.automaticBarriersEnabled) {
1029 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1030 }
1031 #endif
1032 PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1033
1034 // barrier point for pending barriers
1035 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1036 stateData_.automaticBarriersEnabled = false;
1037 }
1038
EndDisableAutomaticBarrierPoints()1039 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1040 {
1041 #if (RENDER_VALIDATION_ENABLED == 1)
1042 if (stateData_.automaticBarriersEnabled) {
1043 PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1044 }
1045 #endif
1046 PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1047
1048 stateData_.automaticBarriersEnabled = true;
1049 }
1050
AddCustomBarrierPoint()1051 void RenderCommandList::AddCustomBarrierPoint()
1052 {
1053 const bool barrierState = stateData_.automaticBarriersEnabled;
1054 stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1055 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1056 stateData_.automaticBarriersEnabled = barrierState;
1057 }
1058
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1059 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1060 {
1061 #if (RENDER_VALIDATION_ENABLED == 1)
1062 if (stateData_.renderPassHasBegun) {
1063 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1064 }
1065 #endif
1066
1067 CommandBarrier cb {
1068 RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1069 {
1070 source.accessFlags,
1071 source.pipelineStageFlags,
1072 },
1073 {},
1074 {
1075 destination.accessFlags,
1076 destination.pipelineStageFlags,
1077 },
1078 {},
1079 };
1080
1081 customBarriers_.emplace_back(std::move(cb));
1082
1083 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1084 }
1085
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1086 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1087 const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1088 {
1089 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1090
1091 #if (RENDER_VALIDATION_ENABLED == 1)
1092 if (stateData_.renderPassHasBegun) {
1093 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1094 }
1095 if (byteSize == 0) {
1096 PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1097 "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1098 }
1099 if (handleType != RenderHandleType::GPU_BUFFER) {
1100 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1101 }
1102 #endif
1103
1104 if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1105 ResourceBarrier src;
1106 src.accessFlags = source.accessFlags;
1107 src.pipelineStageFlags = source.pipelineStageFlags;
1108 src.optionalByteOffset = byteOffset;
1109 src.optionalByteSize = byteSize;
1110
1111 ResourceBarrier dst;
1112 dst.accessFlags = destination.accessFlags;
1113 dst.pipelineStageFlags = destination.pipelineStageFlags;
1114 dst.optionalByteOffset = byteOffset;
1115 dst.optionalByteSize = byteSize;
1116
1117 CommandBarrier cb {
1118 handle,
1119 std::move(src),
1120 {},
1121 std::move(dst),
1122 {},
1123 };
1124
1125 customBarriers_.emplace_back(std::move(cb));
1126
1127 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1128 }
1129 }
1130
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1131 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1132 const ImageSubresourceRange& imageSubresourceRange)
1133 {
1134 // specific layout MAX_ENUM to state that we fetch the correct state
1135 ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1136 CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1137 }
1138
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1139 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1140 const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1141 {
1142 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1143
1144 #if (RENDER_VALIDATION_ENABLED == 1)
1145 if (stateData_.renderPassHasBegun) {
1146 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1147 }
1148 if (handleType != RenderHandleType::GPU_IMAGE) {
1149 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1150 }
1151 ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1152 #endif
1153
1154 if (handleType == RenderHandleType::GPU_IMAGE) {
1155 ResourceBarrier src;
1156 src.accessFlags = source.accessFlags;
1157 src.pipelineStageFlags = source.pipelineStageFlags;
1158 src.optionalImageLayout = source.imageLayout;
1159 src.optionalImageSubresourceRange = imageSubresourceRange;
1160
1161 ResourceBarrier dst;
1162 dst.accessFlags = destination.accessFlags;
1163 dst.pipelineStageFlags = destination.pipelineStageFlags;
1164 dst.optionalImageLayout = destination.imageLayout;
1165 dst.optionalImageSubresourceRange = imageSubresourceRange;
1166
1167 CommandBarrier cb {
1168 handle,
1169 std::move(src),
1170 {},
1171 std::move(dst),
1172 {},
1173 };
1174
1175 customBarriers_.emplace_back(std::move(cb));
1176
1177 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1178 }
1179 }
1180
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1181 void RenderCommandList::CopyBufferToBuffer(
1182 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1183 {
1184 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1185 // NOTE: combine copies, and only single combined barrier?
1186 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1187 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1188 AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1189 }
1190
1191 RenderCommandCopyBuffer* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1192 if (data) {
1193 data->srcHandle = sourceHandle;
1194 data->dstHandle = destinationHandle;
1195 data->bufferCopy = bufferCopy;
1196
1197 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1198 }
1199 } else {
1200 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1201 }
1202 }
1203
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1204 void RenderCommandList::CopyBufferToImage(
1205 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1206 {
1207 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1208 // NOTE: combine copies, and only single combined barrier?
1209 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1210 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1211 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1212 }
1213
1214 RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1215 if (data) {
1216 data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1217 data->srcHandle = sourceHandle;
1218 data->dstHandle = destinationHandle;
1219 data->bufferImageCopy = bufferImageCopy;
1220
1221 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1222 }
1223 } else {
1224 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1225 }
1226 }
1227
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1228 void RenderCommandList::CopyImageToBuffer(
1229 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1230 {
1231 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1232 // NOTE: combine copies, and only single combined barrier?
1233 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1234 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1235 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1236 }
1237
1238 RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1239 if (data) {
1240 data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1241 data->srcHandle = sourceHandle;
1242 data->dstHandle = destinationHandle;
1243 data->bufferImageCopy = bufferImageCopy;
1244
1245 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1246 }
1247 } else {
1248 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1249 }
1250 }
1251
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1252 void RenderCommandList::CopyImageToImage(
1253 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1254 {
1255 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1256 // NOTE: combine copies, and only single combined barrier?
1257 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1258 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1259 AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1260 }
1261
1262 RenderCommandCopyImage* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1263 if (data) {
1264 data->srcHandle = sourceHandle;
1265 data->dstHandle = destinationHandle;
1266 data->imageCopy = imageCopy;
1267
1268 renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1269 }
1270 } else {
1271 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1272 }
1273 }
1274
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1275 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1276 const ImageBlit& imageBlit, const Filter filter)
1277 {
1278 if (!stateData_.renderPassHasBegun) {
1279 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1280 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1281 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1282 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1283 }
1284
1285 RenderCommandBlitImage* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1286 if (data) {
1287 data->srcHandle = sourceHandle;
1288 data->dstHandle = destinationHandle;
1289 data->imageBlit = imageBlit;
1290 data->filter = filter;
1291 // NOTE: desired layouts (barrier point needs to respect these)
1292 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1293 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1294
1295 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1296 }
1297 }
1298 } else {
1299 PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1300 }
1301 }
1302
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1303 void RenderCommandList::UpdateDescriptorSet(
1304 const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1305 {
1306 #if (RENDER_VALIDATION_ENABLED == 1)
1307 ValidateDescriptorTypeBinding(gpuResourceMgr_, bindingResources);
1308 #endif
1309 #if (RENDER_VALIDATION_ENABLED == 1)
1310 if (bindingResources.bindingMask != bindingResources.descriptorSetBindingMask) {
1311 PLUGIN_LOG_E("RENDER_VALIDATION: invalid bindings in descriptor set update");
1312 }
1313 #endif
1314 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1315 if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1316 nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handle, bindingResources, gpuQueue_);
1317 RenderCommandUpdateDescriptorSets* data = AllocateRenderCommand<RenderCommandUpdateDescriptorSets>(allocator_);
1318 if (data) {
1319 *data = {}; // default
1320 data->descriptorSetHandles[0] = handle;
1321
1322 renderCommands_.push_back({ RenderCommandType::UPDATE_DESCRIPTOR_SETS, data });
1323 }
1324 } else {
1325 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1326 }
1327 }
1328
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles,const array_view<const uint32_t> dynamicOffsets)1329 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles,
1330 const array_view<const uint32_t> dynamicOffsets)
1331 {
1332 const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(handles.size());
1333 if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1334 PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1335 maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1336 return;
1337 }
1338
1339 ValidatePipeline();
1340
1341 #if (RENDER_VALIDATION_ENABLED == 1)
1342 if ((handles.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) ||
1343 (dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT)) {
1344 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1345 }
1346 #endif
1347
1348 if (auto* data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1349 *data = {}; // default
1350
1351 if (!dynamicOffsets.empty()) {
1352 if (auto* doData = AllocateRenderData(allocator_, dynamicOffsets.size() * sizeof(uint32_t)); doData) {
1353 data->dynamicOffsets = reinterpret_cast<uint32_t*>(doData);
1354 data->dynamicOffsetCount = static_cast<uint32_t>(dynamicOffsets.size());
1355 CloneData(data->dynamicOffsets, dynamicOffsets.size_bytes(), dynamicOffsets.data(),
1356 dynamicOffsets.size_bytes());
1357 }
1358 }
1359
1360 data->psoHandle = stateData_.currentPsoHandle;
1361 data->firstSet = firstSet;
1362 data->setCount = static_cast<uint32_t>(handles.size());
1363
1364 uint32_t descriptorSetCounterForBarriers = 0;
1365 uint32_t currSet = firstSet;
1366 for (const RenderHandle currHandle : handles) {
1367 PLUGIN_ASSERT(currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1368 data->descriptorSetHandles[currSet] = currHandle;
1369
1370 const bool hasDynamicBarrierResources =
1371 nodeContextDescriptorSetManager_.HasDynamicBarrierResources(currHandle);
1372 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1373 descriptorSetHandlesForBarriers_.emplace_back(currHandle);
1374 descriptorSetCounterForBarriers++;
1375 }
1376 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1377 stateData_.currentBoundSets[currSet].descriptorSetHandle = currHandle;
1378 stateData_.currentBoundSetsMask |= (1 << currSet);
1379 ++currSet;
1380 }
1381
1382 renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1383
1384 if (stateData_.renderPassHasBegun) { // add possible barriers before render pass
1385 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
1386 stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1387 } else if (stateData_.automaticBarriersEnabled) {
1388 stateData_.dirtyDescriptorSetsForBarriers = true;
1389 }
1390 }
1391 }
1392
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1393 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1394 {
1395 BindDescriptorSets(firstSet, handles, {});
1396 }
1397
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1398 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1399 {
1400 BindDescriptorSets(set, array_view<const RenderHandle>(&handle, 1), {});
1401 }
1402
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1403 void RenderCommandList::BindDescriptorSet(
1404 const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1405 {
1406 BindDescriptorSets(set, array_view<const RenderHandle>(&handle, 1), dynamicOffsets);
1407 }
1408
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1409 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1410 const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1411 const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1412 const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1413 {
1414 if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1415 #if (RENDER_VULKAN_RT_ENABLED == 1)
1416 RenderCommandBuildAccelerationStructure* data =
1417 AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1418 if (data) {
1419 data->type = geometry.info.type;
1420 data->flags = geometry.info.flags;
1421 data->mode = geometry.info.mode;
1422 data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1423 data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1424 data->scratchBuffer = geometry.scratchBuffer.handle;
1425 data->scratchOffset = geometry.scratchBuffer.offset;
1426
1427 if (!triangles.empty()) {
1428 AccelerationStructureGeometryTrianglesData* trianglesData =
1429 static_cast<AccelerationStructureGeometryTrianglesData*>(AllocateRenderData(
1430 allocator_, sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1431 data->trianglesData = trianglesData;
1432 data->trianglesView = { data->trianglesData, triangles.size() };
1433 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1434 data->trianglesView[idx] = triangles[idx];
1435 }
1436 }
1437 if (!aabbs.empty()) {
1438 AccelerationStructureGeometryAabbsData* aabbsData =
1439 static_cast<AccelerationStructureGeometryAabbsData*>(
1440 AllocateRenderData(allocator_, sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1441 data->aabbsData = aabbsData;
1442 data->aabbsView = { data->aabbsData, aabbs.size() };
1443 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1444 data->aabbsView[idx] = aabbs[idx];
1445 }
1446 }
1447 if (!instances.empty()) {
1448 AccelerationStructureGeometryInstancesData* instancesData =
1449 static_cast<AccelerationStructureGeometryInstancesData*>(AllocateRenderData(
1450 allocator_, sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1451 data->instancesData = instancesData;
1452 data->instancesView = { data->instancesData, instances.size() };
1453 for (size_t idx = 0; idx < instances.size(); ++idx) {
1454 data->instancesView[idx] = instances[idx];
1455 }
1456 }
1457 renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1458 }
1459 #endif
1460 }
1461 }
1462
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1463 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1464 {
1465 #if (RENDER_VALIDATION_ENABLED == 1)
1466 ValidateViewport(viewportDesc);
1467 #endif
1468 RenderCommandDynamicStateViewport* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1469 if (data) {
1470 data->viewportDesc = viewportDesc;
1471 data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1472 data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1473 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1474 }
1475 }
1476
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1477 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1478 {
1479 #if (RENDER_VALIDATION_ENABLED == 1)
1480 ValidateScissor(scissorDesc);
1481 #endif
1482 RenderCommandDynamicStateScissor* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1483 if (data) {
1484 data->scissorDesc = scissorDesc;
1485 data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1486 data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1487 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1488 }
1489 }
1490
SetDynamicStateLineWidth(const float lineWidth)1491 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1492 {
1493 RenderCommandDynamicStateLineWidth* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1494 if (data) {
1495 data->lineWidth = lineWidth;
1496 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1497 }
1498 }
1499
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1500 void RenderCommandList::SetDynamicStateDepthBias(
1501 const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1502 {
1503 RenderCommandDynamicStateDepthBias* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1504 if (data) {
1505 data->depthBiasConstantFactor = depthBiasConstantFactor;
1506 data->depthBiasClamp = depthBiasClamp;
1507 data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1508 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1509 }
1510 }
1511
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1512 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1513 {
1514 constexpr uint32_t THRESHOLD = 4;
1515 #if (RENDER_VALIDATION_ENABLED == 1)
1516 if (blendConstants.size() > THRESHOLD) {
1517 PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1518 THRESHOLD);
1519 }
1520 #endif
1521 RenderCommandDynamicStateBlendConstants* data =
1522 AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1523 if (data) {
1524 *data = {};
1525 const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1526 for (uint32_t idx = 0; idx < bcCount; ++idx) {
1527 data->blendConstants[idx] = blendConstants[idx];
1528 }
1529 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1530 }
1531 }
1532
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1533 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1534 {
1535 RenderCommandDynamicStateDepthBounds* data =
1536 AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1537 if (data) {
1538 data->minDepthBounds = minDepthBounds;
1539 data->maxDepthBounds = maxDepthBounds;
1540 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1541 }
1542 }
1543
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1544 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1545 {
1546 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1547 if (data) {
1548 data->dynamicState = StencilDynamicState::COMPARE_MASK;
1549 data->faceMask = faceMask;
1550 data->mask = compareMask;
1551 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1552 }
1553 }
1554
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1555 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1556 {
1557 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1558 if (data) {
1559 data->dynamicState = StencilDynamicState::WRITE_MASK;
1560 data->faceMask = faceMask;
1561 data->mask = writeMask;
1562 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1563 }
1564 }
1565
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1566 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1567 {
1568 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1569 if (data) {
1570 data->dynamicState = StencilDynamicState::REFERENCE;
1571 data->faceMask = faceMask;
1572 data->mask = reference;
1573 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1574 }
1575 }
1576
SetExecuteBackendFramePosition()1577 void RenderCommandList::SetExecuteBackendFramePosition()
1578 {
1579 if (stateData_.executeBackendFrameSet == false) {
1580 AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1581
1582 RenderCommandExecuteBackendFramePosition* data =
1583 AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1584 if (data) {
1585 data->id = 0;
1586 renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1587 stateData_.executeBackendFrameSet = true;
1588 }
1589 } else {
1590 PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1591 }
1592 }
1593
ValidatePipeline()1594 void RenderCommandList::ValidatePipeline()
1595 {
1596 if (!stateData_.validPso) {
1597 stateData_.validCommandList = false;
1598 PLUGIN_LOG_E("RenderCommandList: pso not bound");
1599 }
1600 }
1601
ValidatePipelineLayout()1602 void RenderCommandList::ValidatePipelineLayout()
1603 {
1604 if (stateData_.checkBindPipelineLayout) {
1605 stateData_.checkBindPipelineLayout = false;
1606 // fast check without validation
1607 const uint32_t pipelineLayoutSetsMask =
1608 RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
1609 if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
1610 PLUGIN_LOG_ONCE_E(
1611 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
1612 }
1613 #if (RENDER_VALIDATION_ENABLED == 1)
1614 const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
1615 const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
1616 ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
1617 : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
1618 const uint32_t plDescriptorSetCount = pl.descriptorSetCount;
1619 uint32_t bindCount = 0;
1620 uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
1621 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
1622 const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
1623 if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
1624 bindCount++;
1625 bindSetIndices[idx] = idx;
1626 }
1627 }
1628 if (bindCount < plDescriptorSetCount) {
1629 PLUGIN_LOG_E("RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
1630 }
1631 #endif
1632 }
1633 }
1634 RENDER_END_NAMESPACE()
1635