• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_graph.h"
17 
18 #include <cinttypes>
19 
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24 
25 #include "device/device.h"
26 #include "device/gpu_resource_cache.h"
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32 
33 using namespace BASE_NS;
34 
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38 
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43 
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
47         switch (rc.type) {
48             case RenderCommandType::DRAW: {
49                 PLUGIN_LOG_I("rc: Draw");
50                 break;
51             }
52             case RenderCommandType::DRAW_INDIRECT: {
53                 PLUGIN_LOG_I("rc: DrawIndirect");
54                 break;
55             }
56             case RenderCommandType::DISPATCH: {
57                 PLUGIN_LOG_I("rc: Dispatch");
58                 break;
59             }
60             case RenderCommandType::DISPATCH_INDIRECT: {
61                 PLUGIN_LOG_I("rc: DispatchIndirect");
62                 break;
63             }
64             case RenderCommandType::BIND_PIPELINE: {
65                 PLUGIN_LOG_I("rc: BindPipeline");
66                 break;
67             }
68             case RenderCommandType::BEGIN_RENDER_PASS: {
69                 PLUGIN_LOG_I("rc: BeginRenderPass");
70                 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
71                     const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
72                     for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
73                         const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
74                         PLUGIN_LOG_I("    attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
75                     }
76                     PLUGIN_LOG_I("    subpass count: %u, subpass start idx: %u",
77                         (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
78                 }
79                 break;
80             }
81             case RenderCommandType::NEXT_SUBPASS: {
82                 PLUGIN_LOG_I("rc: NextSubpass");
83                 break;
84             }
85             case RenderCommandType::END_RENDER_PASS: {
86                 PLUGIN_LOG_I("rc: EndRenderPass");
87                 break;
88             }
89             case RenderCommandType::BIND_VERTEX_BUFFERS: {
90                 PLUGIN_LOG_I("rc: BindVertexBuffers");
91                 break;
92             }
93             case RenderCommandType::BIND_INDEX_BUFFER: {
94                 PLUGIN_LOG_I("rc: BindIndexBuffer");
95                 break;
96             }
97             case RenderCommandType::COPY_BUFFER: {
98                 PLUGIN_LOG_I("rc: CopyBuffer");
99                 break;
100             }
101             case RenderCommandType::COPY_BUFFER_IMAGE: {
102                 PLUGIN_LOG_I("rc: CopyBufferImage");
103                 break;
104             }
105             case RenderCommandType::COPY_IMAGE: {
106                 PLUGIN_LOG_I("rc: CopyImage");
107                 break;
108             }
109             case RenderCommandType::BLIT_IMAGE: {
110                 PLUGIN_LOG_I("rc: BlitImage");
111                 break;
112             }
113             case RenderCommandType::BARRIER_POINT: {
114                 PLUGIN_LOG_I("rc: BarrierPoint");
115                 break;
116             }
117             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
118                 PLUGIN_LOG_I("rc: BindDescriptorSets");
119                 break;
120             }
121             case RenderCommandType::PUSH_CONSTANT: {
122                 PLUGIN_LOG_I("rc: PushConstant");
123                 break;
124             }
125             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
126                 PLUGIN_LOG_I("rc: BuildAccelerationStructure");
127                 break;
128             }
129             case RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES: {
130                 PLUGIN_LOG_I("rc: CopyAccelerationStructureInstances");
131                 break;
132             }
133             case RenderCommandType::CLEAR_COLOR_IMAGE: {
134                 PLUGIN_LOG_I("rc: ClearColorImage");
135                 break;
136             }
137 
138             // dynamic states
139             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
140                 PLUGIN_LOG_I("rc: DynamicStateViewport");
141                 break;
142             }
143             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
144                 PLUGIN_LOG_I("rc: DynamicStateScissor");
145                 break;
146             }
147             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
148                 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
149                 break;
150             }
151             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
152                 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
153                 break;
154             }
155             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
156                 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
157                 break;
158             }
159             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
160                 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
161                 break;
162             }
163             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
164                 PLUGIN_LOG_I("rc: DynamicStateStencil");
165                 break;
166             }
167             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
168                 PLUGIN_LOG_I("rc: DynamicStateFragmentShadingRate");
169                 break;
170             }
171             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
172                 PLUGIN_LOG_I("rc: ExecuteBackendFramePosition");
173                 break;
174             }
175 
176             case RenderCommandType::WRITE_TIMESTAMP: {
177                 PLUGIN_LOG_I("rc: WriteTimestamp");
178                 break;
179             }
180             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
181                 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
182                 break;
183             }
184             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
185                 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
186                 break;
187             }
188             case RenderCommandType::BEGIN_DEBUG_MARKER: {
189                 PLUGIN_LOG_I("rc: BeginDebugMarker");
190                 break;
191             }
192             case RenderCommandType::END_DEBUG_MARKER: {
193                 PLUGIN_LOG_I("rc: EndDebugMarker");
194                 break;
195             }
196             case RenderCommandType::UNDEFINED:
197             case RenderCommandType::COUNT: {
198                 PLUGIN_ASSERT(false && "non-valid render command");
199                 break;
200             }
201         }
202     }
203 }
204 
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)205 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
206 {
207     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
208         for (const auto& ref : combinedBarriers) {
209             const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
210             if (type == RenderHandleType::GPU_BUFFER) {
211                 PLUGIN_LOG_I("barrier buffer    :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
212                     ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
213                     ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags);
214             } else {
215                 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
216                 PLUGIN_LOG_I("barrier image     :: handle:0x%" PRIx64
217                              " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
218                     ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
219                     ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags, ref.src.optionalImageLayout,
220                     ref.dst.optionalImageLayout);
221             }
222         }
223     }
224 }
225 
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)226 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
227 {
228     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
229         for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
230             const auto handle = rc.renderPassDesc.attachmentHandles[idx];
231             const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
232             const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
233             PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64
234                          " name:%s, src_layout:%u dst_layout:%u (patched later)",
235                 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
236         }
237     }
238 }
239 
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)240 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
241 {
242     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
243         // NOTE: gpuHandle might be the same when generation index wraps around
244         // and when using shallow handles (shadow -> re-use normal -> shadow -> re-use normal etc)
245         const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
246         PLUGIN_LOG_I("image_state   :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
247             resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
248             resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
249             RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
250             RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
251         // one could fetch and print vulkan handle here as well e.g.
252         // 1. const GpuImagePlatformDataVk& plat =
253         // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
254         // 3. PLUGIN_LOG_I("end_frame image   :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
255     }
256 }
257 #endif // RENDER_DEV_ENABLED
258 
259 constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
260                                         CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
261                                         CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
262                                         CORE_ACCESS_MEMORY_WRITE_BIT;
263 
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)264 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
265     RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
266 {
267     const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
268     for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
269         if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
270             beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
271             storeState.resource.imageLayout = imageLayout;
272         }
273     }
274 }
275 
UpdateMultiRenderCommandListRenderPasses(Device & device,RenderGraph::MultiRenderPassStore & store)276 void UpdateMultiRenderCommandListRenderPasses(Device& device, RenderGraph::MultiRenderPassStore& store)
277 {
278     const auto renderPassCount = (uint32_t)store.renderPasses.size();
279     PLUGIN_ASSERT(renderPassCount > 1);
280 
281     RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
282     PLUGIN_ASSERT(firstRenderPass);
283     PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
284     const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
285     PLUGIN_ASSERT(lastRenderPass);
286 
287     const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
288 
289     // take attachment loads from the first one, and stores from the last one
290     // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
291     // initial store the correct render pass description to first render pass and then copy to others
292     // resource states are copied from valid subpasses to another render command list subpasses
293     for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
294         const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
295         const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
296         const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
297         for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
298             if (fromRpIdx != toRpIdx) {
299                 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
300                 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
301                 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
302                     toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
303                     toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
304                 }
305             }
306         }
307     }
308 
309     for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
310         firstRenderPass->renderPassDesc.attachments[idx].storeOp =
311             lastRenderPass->renderPassDesc.attachments[idx].storeOp;
312         firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
313             lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
314 
315         firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
316             lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
317     }
318 
319     // copy subpasses to first and mark if merging subpasses
320     bool mergeSubpasses = false;
321     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
322         if ((idx < store.renderPasses.size()) && (idx < store.renderPasses[idx]->subpasses.size())) {
323             firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
324             if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
325                 mergeSubpasses = true;
326             }
327         }
328 #if (RENDER_VALIDATION_ENABLED == 1)
329         if ((idx >= store.renderPasses.size()) || (idx >= store.renderPasses[idx]->subpasses.size())) {
330             PLUGIN_LOG_W("Invalid render pass subpass configuration for multi render pass");
331         }
332 #endif
333     }
334     // NOTE: only use merge subpasses in vulkan at the moment
335     if (device.GetBackendType() != DeviceBackendType::VULKAN) {
336         mergeSubpasses = false;
337     }
338 
339     uint32_t subpassCount = renderPassCount;
340     if (mergeSubpasses) {
341         PLUGIN_ASSERT(renderPassCount > 1U);
342         // merge from back to front
343         const uint32_t finalSubpass = renderPassCount - 1U;
344         uint32_t mergeCount = 0U;
345         for (uint32_t idx = finalSubpass; idx > 0U; --idx) {
346             if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
347                 PLUGIN_ASSERT(idx > 0U);
348 
349                 uint32_t prevSubpassIdx = idx - 1U;
350                 auto& currSubpass = firstRenderPass->subpasses[idx];
351                 auto& prevSubpass = firstRenderPass->subpasses[prevSubpassIdx];
352                 // cannot merge in these cases
353                 if (currSubpass.inputAttachmentCount != prevSubpass.inputAttachmentCount) {
354                     currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
355 #if (RENDER_VALIDATION_ENABLED == 1)
356                     PLUGIN_LOG_W(
357                         "RENDER_VALIDATION: Trying to merge subpasses with input attachments, undefined results");
358 #endif
359                 }
360                 if (prevSubpass.resolveAttachmentCount > currSubpass.resolveAttachmentCount) {
361                     currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
362 #if (RENDER_VALIDATION_ENABLED == 1)
363                     PLUGIN_LOG_W("RENDER_VALIDATION: Trying to merge subpasses with different resolve counts, "
364                                  "undefined results");
365 #endif
366                 }
367                 if ((currSubpass.subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
368                     // merge failed -> continue
369                     continue;
370                 }
371 
372                 mergeCount++;
373                 auto& currRenderPass = store.renderPasses[idx];
374                 const auto& currSubpassResourceStates = currRenderPass->subpassResourceStates[idx];
375                 currRenderPass->subpassStartIndex = currRenderPass->subpassStartIndex - 1U;
376                 // can merge
377                 currSubpass.subpassFlags |= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
378 
379                 auto& prevRenderPass = store.renderPasses[prevSubpassIdx];
380                 auto& prevSubpassResourceStates = prevRenderPass->subpassResourceStates[prevSubpassIdx];
381                 // NOTE: at the moment copies everything from the current subpass
382                 CloneData(&prevSubpass, sizeof(RenderPassSubpassDesc), &currSubpass, sizeof(RenderPassSubpassDesc));
383                 // copy layouts and states from the current to previous
384                 for (uint32_t resourceIdx = 0U; resourceIdx < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT;
385                      ++resourceIdx) {
386                     prevSubpassResourceStates.layouts[resourceIdx] = currSubpassResourceStates.layouts[resourceIdx];
387                     prevSubpassResourceStates.states[resourceIdx] = currSubpassResourceStates.states[resourceIdx];
388                 }
389             }
390         }
391 
392         // new minimal subpass count
393         subpassCount = subpassCount - mergeCount;
394         firstRenderPass->renderPassDesc.subpassCount = subpassCount;
395         firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), subpassCount };
396         // update subpass start indices
397         uint32_t subpassStartIndex = 0;
398         for (uint32_t idx = 1U; idx < renderPassCount; ++idx) {
399             auto& currRenderPass = store.renderPasses[idx];
400             if (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
401                 currRenderPass->subpassStartIndex = subpassStartIndex;
402             } else {
403                 subpassStartIndex++;
404             }
405         }
406     }
407 
408     // copy from first to following render passes
409     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
410         // subpass start index is the only changing variables
411         auto& currRenderPass = store.renderPasses[idx];
412         const uint32_t subpassStartIndex = currRenderPass->subpassStartIndex;
413         currRenderPass->renderPassDesc = firstRenderPass->renderPassDesc;
414         // advance subpass start index if not merging
415         if (mergeSubpasses &&
416             ((idx < currRenderPass->subpasses.size()) &&
417                 (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT))) {
418             // NOTE: subpassResourceStates are copied in this case
419             currRenderPass->subpassResourceStates[subpassStartIndex] =
420                 firstRenderPass->subpassResourceStates[subpassStartIndex];
421         }
422         currRenderPass->subpassStartIndex = subpassStartIndex;
423         // copy all subpasses and input resource states
424         currRenderPass->subpasses = firstRenderPass->subpasses;
425         currRenderPass->inputResourceStates = firstRenderPass->inputResourceStates;
426         // image layouts needs to match
427         currRenderPass->imageLayouts = firstRenderPass->imageLayouts;
428         // NOTE: subpassResourceStates are only copied when doing merging
429     }
430 }
431 
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)432 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
433 {
434     return {
435         state.accessFlags,
436         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
437         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
438         res.byteOffset,
439         res.byteSize,
440     };
441 }
442 
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)443 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
444 {
445     return {
446         state.accessFlags,
447         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
448         res.imageLayout,
449         0,
450         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
451     };
452 }
453 
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)454 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
455     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
456 {
457     uint32_t mipLevel = 0U;
458     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
459     ImageLayout srcImageLayout = src.imageLayout;
460     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
461         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
462         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
463             mipLevel = dst.mip;
464             mipCount = 1U;
465         } else {
466             mipLevel = src.mip;
467             // all mip levels
468         }
469         PLUGIN_ASSERT(additionalImageState.layouts);
470         srcImageLayout = additionalImageState.layouts[mipLevel];
471     }
472     return {
473         state.accessFlags,
474         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
475         srcImageLayout,
476         0,
477         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
478         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
479     };
480 }
481 
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)482 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
483 {
484     return {
485         state.accessFlags,
486         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
487         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
488         res.byteOffset,
489         res.byteSize,
490     };
491 }
492 
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)493 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
494 {
495     return {
496         state.accessFlags,
497         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
498         res.imageLayout,
499         0,
500         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
501     };
502 }
503 
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst)504 ResourceBarrier GetDstImageBarrierMips(
505     const GpuResourceState& state, const BindableImage& src, const BindableImage& dst)
506 {
507     uint32_t mipLevel = 0U;
508     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
509     ImageLayout dstImageLayout = dst.imageLayout;
510     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
511         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
512         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
513             mipLevel = dst.mip;
514             mipCount = 1U;
515         } else {
516             mipLevel = src.mip;
517             // all mip levels
518         }
519     }
520     return {
521         state.accessFlags,
522         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
523         dstImageLayout,
524         0,
525         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
526         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
527     };
528 }
529 
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)530 void ModifyAdditionalImageState(
531     const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
532 {
533 #if (RENDER_VALIDATION_ENABLED == 1)
534     // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
535     PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
536 #endif
537     if (additionalStateRef.layouts) {
538         if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
539             (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
540             additionalStateRef.layouts[res.mip] = res.imageLayout;
541         } else {
542             // set layout for all mips
543             for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
544                 additionalStateRef.layouts[idx] = res.imageLayout;
545             }
546         }
547     } else {
548 #if (RENDER_VALIDATION_ENABLED == 1)
549         PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
550 #endif
551     }
552 }
553 
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)554 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
555     const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
556 {
557     return {
558         handle,
559 
560         ResourceBarrier {
561             0,
562             PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
563             srcImageLayout,
564             0,
565             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
566             ImageSubresourceRange {},
567         },
568         srcGpuQueue,
569 
570         ResourceBarrier {
571             0,
572             PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
573             dstImageLayout,
574             0,
575             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
576             ImageSubresourceRange {},
577         },
578         dstGpuQueue,
579     };
580 }
581 
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)582 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
583     array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
584 {
585     for (const auto& transferRef : currNodeGpuResourceTransfers) {
586         PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
587         if (transferRef.acquireNodeIdx >= frameRenderNodeContextData.size()) {
588             // skip
589             continue;
590         }
591 
592         auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
593         const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
594         GpuQueue releaseGpuQueue = acquireGpuQueue;
595 
596         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
597             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
598             releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
599         }
600 
601         const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
602             acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
603 
604         // release ownership (NOTE: not done for previous frame)
605         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
606             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
607             const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
608             const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
609             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
610             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
611             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
612 
613             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
614             releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
615 
616             // inform that we are patching valid barriers
617             releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
618         }
619         // acquire ownership
620         {
621             const uint32_t rcIndex = 0;
622             const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
623             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
624             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
625             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
626 
627             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
628             acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
629 
630             // inform that we are patching valid barriers
631             acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
632         }
633     }
634 }
635 
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)636 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
637     const uint32_t customBarrierCount, const RenderHandle handle)
638 {
639     bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
640     if ((customBarrierCount > 0) && needsBarrier) {
641         needsBarrier = (handledCustomBarriers.count(handle) == 0);
642     }
643     return needsBarrier;
644 }
645 } // namespace
646 
RenderGraph(Device & device)647 RenderGraph::RenderGraph(Device& device)
648     : device_(device), gpuResourceMgr_((GpuResourceManager&)device.GetGpuResourceManager())
649 {}
650 
BeginFrame()651 void RenderGraph::BeginFrame()
652 {
653     stateCache_.multiRenderPassStore.renderPasses.clear();
654     stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
655     stateCache_.multiRenderPassStore.supportOpen = false;
656     stateCache_.nodeCounter = 0u;
657     stateCache_.checkForBackbufferDependency = false;
658     stateCache_.usesSwapchainImage = false;
659 }
660 
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)661 void RenderGraph::ProcessRenderNodeGraph(
662     const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
663 {
664     stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
665 
666     // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
667     // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
668     // handles:
669     // gpu images in descriptor sets, render passes, blits, and custom barriers
670     // gpu buffers in descriptor sets, and custom barriers
671 
672     {
673         // remove resources that will not be tracked anymore and release available slots
674         const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
675         for (const auto& handle : stateResetData.resources) {
676             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
677             const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
678             if ((handleType == RenderHandleType::GPU_IMAGE) &&
679                 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
680                 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
681                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
682                     gpuImageTracking_[dataIdx] = {}; // reset
683                     gpuImageAvailableIndices_.push_back(dataIdx);
684                 }
685                 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
686             } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
687                 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
688                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
689                     gpuBufferTracking_[dataIdx] = {}; // reset
690                     gpuBufferAvailableIndices_.push_back(dataIdx);
691                 }
692                 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
693             }
694         }
695     }
696 
697     gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
698     gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
699 
700 #if (RENDER_DEV_ENABLED == 1)
701     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
702                   CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
703         static uint64_t debugFrame = 0;
704         debugFrame++;
705         PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
706     }
707 #endif
708 
709     // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
710     ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
711 
712     // store final state for next frame
713     StoreFinalBufferState();
714     StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
715 }
716 
GetSwapchainResourceStates() const717 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
718 {
719     return swapchainStates_;
720 }
721 
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)722 void RenderGraph::ProcessRenderNodeGraphNodeStores(
723     const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
724 {
725     for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
726         PLUGIN_ASSERT(graphStore);
727         if (!graphStore) {
728             continue;
729         }
730 
731         for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
732             auto& ref = graphStore->renderNodeContextData[nodeIdx];
733             ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
734             stateCache.usesSwapchainImage = false;                // reset
735 
736 #if (RENDER_DEV_ENABLED == 1)
737             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
738                 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
739             }
740 #endif
741 
742             if (stateCache.multiRenderPassStore.supportOpen && (stateCache.multiRenderPassStore.renderPasses.empty())) {
743                 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
744                 // NOTE: add more error handling and invalidate render command lists
745             }
746             stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
747             array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
748             // go through commands that affect or need transitions and barriers
749             ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
750 
751             // needs backbuffer/swapchain wait
752             if (stateCache.usesSwapchainImage) {
753                 ref.submitInfo.waitForSwapchainAcquireSignal = true;
754             }
755 
756             // patch gpu resource queue transfers
757             if (!currNodeGpuResourceTransfers_.empty()) {
758                 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
759                 // clear for next use
760                 currNodeGpuResourceTransfers_.clear();
761             }
762 
763             stateCache_.nodeCounter++;
764         }
765     }
766 }
767 
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)768 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
769     const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
770 {
771     for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
772         auto& cmdRef = cmdListRef[listIdx];
773 
774 #if (RENDER_DEV_ENABLED == 1)
775         if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
776             DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
777         }
778 #endif
779 
780         // most of the commands are handled within BarrierPoint
781         switch (cmdRef.type) {
782             case RenderCommandType::BARRIER_POINT:
783                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
784                 break;
785 
786             case RenderCommandType::BEGIN_RENDER_PASS:
787                 RenderCommand(
788                     nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
789                 break;
790 
791             case RenderCommandType::END_RENDER_PASS:
792                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
793                 break;
794 
795             case RenderCommandType::NEXT_SUBPASS:
796             case RenderCommandType::DRAW:
797             case RenderCommandType::DRAW_INDIRECT:
798             case RenderCommandType::DISPATCH:
799             case RenderCommandType::DISPATCH_INDIRECT:
800             case RenderCommandType::BIND_PIPELINE:
801             case RenderCommandType::BIND_VERTEX_BUFFERS:
802             case RenderCommandType::BIND_INDEX_BUFFER:
803             case RenderCommandType::COPY_BUFFER:
804             case RenderCommandType::COPY_BUFFER_IMAGE:
805             case RenderCommandType::COPY_IMAGE:
806             case RenderCommandType::BIND_DESCRIPTOR_SETS:
807             case RenderCommandType::PUSH_CONSTANT:
808             case RenderCommandType::BLIT_IMAGE:
809             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
810             case RenderCommandType::CLEAR_COLOR_IMAGE:
811             case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
812             case RenderCommandType::DYNAMIC_STATE_SCISSOR:
813             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
814             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
815             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
816             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
817             case RenderCommandType::DYNAMIC_STATE_STENCIL:
818             case RenderCommandType::WRITE_TIMESTAMP:
819             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
820             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
821             case RenderCommandType::UNDEFINED:
822             default: {
823                 // nop
824                 break;
825             }
826         }
827     } // end command for
828 }
829 
StoreFinalBufferState()830 void RenderGraph::StoreFinalBufferState()
831 {
832     for (auto& ref : gpuBufferTracking_) {
833         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
834             ref = {};
835             continue;
836         }
837         // NOTE: we cannot soft reset here
838         // if we do so some buffer usage might overlap in the next frame
839         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
840             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
841             const RenderHandle handle = ref.resource.handle;
842             ref = {};
843             ref.resource.handle = handle;
844         }
845 
846         // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
847         // frames)
848         ref.prevRenderNodeIndex = { ~0u };
849     }
850 }
851 
StoreFinalImageState()852 void RenderGraph::StoreFinalImageState()
853 {
854     swapchainStates_ = {}; // reset
855 
856 #if (RENDER_DEV_ENABLED == 1)
857     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
858         PLUGIN_LOG_I("end_frame image_state:");
859     }
860 #endif
861     for (auto& ref : gpuImageTracking_) {
862         // if resource is not dynamic, we do not track and care
863         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
864             ref = {};
865             continue;
866         }
867         // handle automatic presentation layout
868         if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
869             if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
870                 RenderCommandBeginRenderPass& beginRenderPass =
871                     *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
872                 PatchRenderPassFinalLayout(
873                     ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
874             }
875             // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
876             // store final state for backbuffer
877             // currently we only swapchains if they are really in use in this frame
878             const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
879             if (flags != 0) {
880                 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
881             }
882         }
883 #if (RENDER_DEV_ENABLED == 1)
884         // print before reset for next frame
885         if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
886             DebugPrintImageState(gpuResourceMgr_, ref);
887         }
888 #endif
889         // shallow resources are not tracked
890         // they are always in undefined state in the beging of the frame
891         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
892             const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
893             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
894             const RenderHandle handle = ref.resource.handle;
895             ref = {};
896             ref.resource.handle = handle;
897             if (addMips) {
898                 PLUGIN_ASSERT(!ref.additionalState.layouts);
899                 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
900             }
901         }
902         // NOTE: render pass compatibility hashing with stages and access flags
903         // creates quite many new graphics pipelines in the first few frames.
904         // else branch with soft reset here could prevent access flags from previous frame.
905         // To get this to work one could get the flags from the end of the frame to the begin as well.
906 
907         // need to reset per frame variables for all images (so we do not try to patch from previous frames)
908         ref.prevRc = {};
909         ref.prevRenderNodeIndex = { ~0u };
910     }
911 }
912 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)913 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
914     RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
915 {
916     // update layouts for attachments to gpu image state
917     BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
918 
919     PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
920 
921     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
922     if (hasRenderPassDependency) { // stitch render pass subpasses
923         BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
924     }
925 
926     const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
927 
928     auto finalImageLayouts =
929         array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
930 
931     BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
932 
933     for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
934         const auto& subpassRef = rc.subpasses[subpassIdx];
935         const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
936 
937         BeginRenderPassUpdateSubpassImageStates(
938             array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
939             subpassResourceStatesRef, finalImageLayouts);
940 
941         BeginRenderPassUpdateSubpassImageStates(
942             array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
943             subpassResourceStatesRef, finalImageLayouts);
944 
945         BeginRenderPassUpdateSubpassImageStates(
946             array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
947             subpassResourceStatesRef, finalImageLayouts);
948 
949         if (subpassRef.depthAttachmentCount == 1u) {
950             BeginRenderPassUpdateSubpassImageStates(
951                 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
952                 subpassResourceStatesRef, finalImageLayouts);
953             if (subpassRef.depthResolveAttachmentCount == 1) {
954                 BeginRenderPassUpdateSubpassImageStates(
955                     array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
956                     rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
957             }
958         }
959         if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
960             BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
961                                                         subpassRef.fragmentShadingRateAttachmentCount),
962                 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
963         }
964     }
965 
966     if (hasRenderPassDependency) { // stitch render pass subpasses
967         if (rc.subpassStartIndex > 0) {
968             // stitched to behave as a nextSubpass() and not beginRenderPass()
969             rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
970         }
971         const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
972         if (finalSubpass) {
973             UpdateMultiRenderCommandListRenderPasses(device_, stateCache.multiRenderPassStore);
974             // multiRenderPassStore cleared in EndRenderPass
975         }
976     }
977 #if (RENDER_DEV_ENABLED == 1)
978     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
979         DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
980     }
981 #endif
982 }
983 
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)984 void RenderGraph::BeginRenderPassHandleDependency(
985     BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
986 {
987     params.stateCache.multiRenderPassStore.renderPasses.push_back(&params.rc);
988     // store the first begin render pass
989     params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
990         params.stateCache.multiRenderPassStore.renderPasses[0] };
991 
992     if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
993 #ifndef NDEBUG
994         // barrier point must be previous command
995         PLUGIN_ASSERT(commandListCommandIndex >= 1);
996         const uint32_t prevCommandIndex = commandListCommandIndex - 1;
997         const RenderCommandWithType& barrierPointCmdRef =
998             nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
999         PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
1000         PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
1001 #endif
1002         params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
1003     }
1004 }
1005 
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)1006 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
1007     array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
1008 {
1009     auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
1010     const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
1011     auto& attachments = params.rc.renderPassDesc.attachments;
1012     auto& attachmentInputResourceStates = params.rc.inputResourceStates;
1013 
1014     for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
1015         const RenderHandle handle = attachmentHandles[attachmentIdx];
1016         // NOTE: invalidate invalid handle commands already in render command list
1017         if (!RenderHandleUtil::IsGpuImage(handle)) {
1018 #ifdef _DEBUG
1019             PLUGIN_LOG_E("invalid handle in render node graph");
1020 #endif
1021             continue;
1022         }
1023         auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
1024         ImageLayout imgLayout = stateRef.resource.imageLayout;
1025 
1026         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1027         // image layout is undefined if automatic barriers have been disabled
1028         if (params.rc.enableAutomaticLayoutChanges) {
1029             const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
1030             if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
1031                 if (stateRef.additionalState.layouts) {
1032                     imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
1033                 } else {
1034 #if (RENDER_VALIDATION_ENABLED == 1)
1035                     PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
1036 #endif
1037                 }
1038             }
1039 
1040             initialImageLayouts[attachmentIdx] = imgLayout;
1041         }
1042         // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
1043         if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
1044             (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
1045             // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
1046             attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
1047         }
1048         finalImageLayouts[attachmentIdx] = imgLayout;
1049         attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
1050         attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
1051 
1052         // store render pass for final layout patching
1053         stateRef.prevRc = params.rpForCmdRef;
1054         stateRef.prevRenderNodeIndex = renderNodeIndex;
1055 
1056         // flag for backbuffer use
1057         if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
1058             params.stateCache.usesSwapchainImage = true;
1059         }
1060     }
1061 }
1062 
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts)1063 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
1064     const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
1065     array_view<ImageLayout> finalImageLayouts)
1066 {
1067     for (const uint32_t attachmentIndex : attatchmentIndices) {
1068         // NOTE: handle invalid commands already in render command list and invalidate draws etc.
1069         PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
1070         const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
1071         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1072         const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
1073         const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
1074         // NOTE: we should support non dynamicity and GENERAL
1075 
1076         finalImageLayouts[attachmentIndex] = refImgLayout;
1077         auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
1078         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1079 
1080         ref.state = refState;
1081         ref.resource.handle = handle;
1082         ref.resource.imageLayout = refImgLayout;
1083         if (addMips) {
1084             const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1085             const BindableImage image {
1086                 handle,
1087                 attachmentDesc.mipLevel,
1088                 attachmentDesc.layer,
1089                 refImgLayout,
1090                 RenderHandle {},
1091             };
1092             ModifyAdditionalImageState(image, ref.additionalState);
1093         }
1094     }
1095 }
1096 
RenderCommand(RenderCommandEndRenderPass & rc,StateCache & stateCache)1097 void RenderGraph::RenderCommand(RenderCommandEndRenderPass& rc, StateCache& stateCache)
1098 {
1099     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1100     if (hasRenderPassDependency) {
1101         const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1102         if (finalSubpass) {
1103             if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1104                 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1105                 // NOTE: add more error handling and invalidate render command lists
1106             }
1107             rc.endType = RenderPassEndType::END_RENDER_PASS;
1108             stateCache.multiRenderPassStore.renderPasses.clear();
1109             stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1110             stateCache.multiRenderPassStore.supportOpen = false;
1111         } else {
1112             rc.endType = RenderPassEndType::END_SUBPASS;
1113         }
1114     }
1115 }
1116 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1117 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1118     RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1119 {
1120     // go through required descriptors for current upcoming event
1121     const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1122     const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1123     const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1124     const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1125 
1126     parameterCachePools_.combinedBarriers.clear();
1127     parameterCachePools_.handledCustomBarriers.clear();
1128     ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1129         rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1130         nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1131     // first check custom barriers
1132     if (parameters.customBarrierCount > 0) {
1133         HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1134     }
1135     // then vertex / index buffer barriers in the barrier point before render pass
1136     if (parameters.vertexInputBarrierCount > 0) {
1137         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1138         HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1139             nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1140     }
1141     if (parameters.indirectBufferBarrierCount > 0U) {
1142         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1143         HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1144             nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1145     }
1146 
1147     // in barrier point the next render command is known for which the barrier is needed
1148     if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1149         HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1150     } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1151         HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1152     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1153         HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1154     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1155         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1156     } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1157         HandleCopyBufferImage(
1158             parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image descriptor sets
1159     } else if (rc.renderCommandType == RenderCommandType::BUILD_ACCELERATION_STRUCTURE) {
1160         HandleBuildAccelerationStructure(parameters, commandListCommandIndex, cmdListRef);
1161     } else if (rc.renderCommandType == RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES) {
1162         HandleCopyAccelerationStructureInstances(parameters, commandListCommandIndex, cmdListRef);
1163     } else {
1164         if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1165             HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1166         } else if (rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS) {
1167             // additional render pass attachment barriers
1168             HandleRenderPassImage(parameters, commandListCommandIndex, cmdListRef);
1169         }
1170         const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1171         const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1172         const uint32_t descriptorSetHandleMaxIndex =
1173             Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1174         const auto descriptorSetHandlesForBarriers =
1175             array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1176                 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1177         HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1178     }
1179 
1180     if (!parameters.combinedBarriers.empty()) {
1181         // use first render pass barrier point with following subpasses
1182         // firstRenderPassBarrierPoint is null for the first subpass
1183         const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1184         if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1185             PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1186             stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1187                 rc.barrierPointIndex, parameters.combinedBarriers);
1188         } else {
1189             nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1190         }
1191     }
1192 #if (RENDER_DEV_ENABLED == 1)
1193     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1194         DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1195     }
1196 #endif
1197 }
1198 
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1199 inline void RenderGraph::UpdateBufferResourceState(
1200     RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1201 {
1202     stateRef.resource.handle = cb.resourceHandle;
1203     stateRef.state.shaderStageFlags = 0;
1204     stateRef.state.accessFlags = cb.dst.accessFlags;
1205     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1206     stateRef.state.gpuQueue = params.gpuQueue;
1207     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1208 }
1209 
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1210 inline void RenderGraph::UpdateImageResourceState(
1211     RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1212 {
1213     stateRef.resource.handle = cb.resourceHandle;
1214     stateRef.state.shaderStageFlags = 0;
1215     stateRef.state.accessFlags = cb.dst.accessFlags;
1216     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1217     stateRef.state.gpuQueue = params.gpuQueue;
1218     stateRef.prevRc = params.rcWithType;
1219     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1220 }
1221 
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1222 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1223     const array_view<const CommandBarrier>& customBarrierListRef)
1224 {
1225     params.handledCustomBarriers.reserve(params.customBarrierCount);
1226     PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1227     for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1228               end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1229          begin != end; ++begin) {
1230         // add a copy and modify if needed
1231         auto& cb = params.combinedBarriers.emplace_back(*begin);
1232 
1233         // NOTE: undefined type is for non-resource memory/pipeline barriers
1234         const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1235         const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1236         PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1237                       (type == RenderHandleType::GPU_IMAGE));
1238         if (type == RenderHandleType::GPU_BUFFER) {
1239             if (isDynamicTrack) {
1240                 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1241                 UpdateBufferResourceState(stateRef, params, cb);
1242             }
1243             params.handledCustomBarriers[cb.resourceHandle] = 0;
1244         } else if (type == RenderHandleType::GPU_IMAGE) {
1245             if (isDynamicTrack) {
1246                 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1247                 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1248                 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1249                     uint32_t mipLevel = 0U;
1250                     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1251                     ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1252                     if (isAddMips) {
1253                         const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1254                         const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1255                         if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1256                             (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1257                             if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1258                                 mipLevel = dstMip;
1259                                 mipCount = 1U;
1260                             } else {
1261                                 mipLevel = srcMip;
1262                                 // all mip levels
1263                             }
1264                             if (stateRef.additionalState.layouts) {
1265                                 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1266                             } else {
1267 #if (RENDER_VALIDATION_ENABLED == 1)
1268                                 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1269 #endif
1270                             }
1271                         }
1272                     }
1273                     cb.src.accessFlags = stateRef.state.accessFlags;
1274                     cb.src.pipelineStageFlags =
1275                         stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1276                     cb.src.optionalImageLayout = srcImageLayout;
1277                     cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1278                         PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1279                 }
1280                 UpdateImageResourceState(stateRef, params, cb);
1281                 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1282                 if (isAddMips) {
1283                     const BindableImage image {
1284                         cb.resourceHandle,
1285                         cb.dst.optionalImageSubresourceRange.baseMipLevel,
1286                         cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1287                         cb.dst.optionalImageLayout,
1288                         RenderHandle {},
1289                     };
1290                     ModifyAdditionalImageState(image, stateRef.additionalState);
1291                 }
1292             }
1293             params.handledCustomBarriers[cb.resourceHandle] = 0;
1294         }
1295     }
1296 }
1297 
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1298 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1299     const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1300 {
1301     for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1302         const uint32_t barrierIndex = barrierIndexBegin + idx;
1303         PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1304         if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1305             const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1306             const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1307                 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1308                 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1309             UpdateStateAndCreateBarriersGpuBuffer(
1310                 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1311         }
1312     }
1313 }
1314 
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1315 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1316     const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1317 {
1318     for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1319         const uint32_t barrierIndex = barrierIndexBegin + idx;
1320         PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1321         if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1322             const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1323             const bool needsArgsBarrier =
1324                 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1325             if (needsArgsBarrier) {
1326                 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1327                     CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1328                 UpdateStateAndCreateBarriersGpuBuffer(
1329                     resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1330             }
1331         }
1332     }
1333 }
1334 
HandleRenderPassImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1335 void RenderGraph::HandleRenderPassImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1336     const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1337 {
1338     const uint32_t nextListIdx = commandListCommandIndex + 1;
1339     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1340     const auto& nextCmdRef = cmdListRef[nextListIdx];
1341     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BEGIN_RENDER_PASS);
1342 
1343     const RenderCommandBeginRenderPass& nextRc = *static_cast<RenderCommandBeginRenderPass*>(nextCmdRef.rc);
1344     // check for all attachments
1345     const RenderPassDesc& rpDesc = nextRc.renderPassDesc;
1346     for (uint32_t attachIdx = 0U; attachIdx < rpDesc.attachmentCount; ++attachIdx) {
1347         const RenderHandle handle = rpDesc.attachmentHandles[attachIdx];
1348         const bool needsBarrier = CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1349         if (needsBarrier) {
1350             const bool depthImage = RenderHandleUtil::IsDepthImage(handle);
1351             BindableImage bRes = {};
1352             bRes.handle = handle;
1353             bRes.imageLayout = depthImage ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
1354                                           : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1355             const AccessFlags accessFlags =
1356                 depthImage ? CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT : CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1357             const PipelineStageFlags pipelineStageFlags = CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1358 
1359             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1360                 GpuResourceState { 0, accessFlags, pipelineStageFlags, params.gpuQueue }, bRes, params.rcWithType,
1361                 params.combinedBarriers, currNodeGpuResourceTransfers_);
1362         }
1363     }
1364 }
1365 
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1366 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1367     const array_view<const RenderCommandWithType>& cmdListRef)
1368 {
1369     const uint32_t nextListIdx = commandListCommandIndex + 1;
1370     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1371     const auto& nextCmdRef = cmdListRef[nextListIdx];
1372     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1373 
1374     const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1375 
1376     const bool needsBarrier =
1377         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1378     if (needsBarrier) {
1379         BindableImage bRes = {};
1380         bRes.handle = nextRc.handle;
1381         bRes.imageLayout = nextRc.imageLayout;
1382         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1383             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1384             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1385     }
1386 }
1387 
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1388 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1389     const array_view<const RenderCommandWithType>& cmdListRef)
1390 {
1391     const uint32_t nextListIdx = commandListCommandIndex + 1;
1392     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1393     const auto& nextCmdRef = cmdListRef[nextListIdx];
1394     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1395 
1396     const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1397 
1398     const bool needsSrcBarrier =
1399         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1400     if (needsSrcBarrier) {
1401         BindableImage bRes = {};
1402         bRes.handle = nextRc.srcHandle;
1403         bRes.imageLayout = nextRc.srcImageLayout;
1404         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1405             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1406             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1407     }
1408 
1409     const bool needsDstBarrier =
1410         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1411     if (needsDstBarrier) {
1412         BindableImage bRes = {};
1413         bRes.handle = nextRc.dstHandle;
1414         bRes.imageLayout = nextRc.dstImageLayout;
1415         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1416             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1417             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1418     }
1419 }
1420 
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1421 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1422     const array_view<const RenderCommandWithType>& cmdListRef)
1423 {
1424     const uint32_t nextListIdx = commandListCommandIndex + 1;
1425     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1426     const auto& nextCmdRef = cmdListRef[nextListIdx];
1427     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1428 
1429     const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1430 
1431     const bool needsSrcBarrier =
1432         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1433     if (needsSrcBarrier) {
1434         const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1435         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1436             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1437             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1438     }
1439 
1440     const bool needsDstBarrier =
1441         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1442     if (needsDstBarrier) {
1443         const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1444         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1445             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1446             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1447     }
1448 }
1449 
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1450 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1451     const array_view<const RenderCommandWithType>& cmdListRef)
1452 {
1453     const uint32_t nextListIdx = commandListCommandIndex + 1;
1454     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1455     const auto& nextCmdRef = cmdListRef[nextListIdx];
1456     PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1457                   (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1458 
1459     // NOTE: two different command types supported
1460     RenderHandle srcHandle;
1461     RenderHandle dstHandle;
1462     ImageSubresourceLayers srcImgLayers;
1463     ImageSubresourceLayers dstImgLayers;
1464     if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1465         const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1466         PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1467         srcHandle = nextRc.srcHandle;
1468         dstHandle = nextRc.dstHandle;
1469         srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1470         dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1471     } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1472         const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1473         srcHandle = nextRc.srcHandle;
1474         dstHandle = nextRc.dstHandle;
1475         srcImgLayers = nextRc.imageCopy.srcSubresource;
1476         dstImgLayers = nextRc.imageCopy.dstSubresource;
1477     }
1478 
1479     const bool needsSrcBarrier =
1480         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1481     if (needsSrcBarrier) {
1482         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1483         PLUGIN_UNUSED(handleType);
1484         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1485         if (handleType == RenderHandleType::GPU_BUFFER) {
1486             BindableBuffer bRes;
1487             bRes.handle = srcHandle;
1488             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1489                 GpuResourceState {
1490                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1491                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1492         } else {
1493             BindableImage bRes;
1494             bRes.handle = srcHandle;
1495             bRes.mip = srcImgLayers.mipLevel;
1496             bRes.layer = srcImgLayers.baseArrayLayer;
1497             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1498             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1499                 GpuResourceState {
1500                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1501                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1502         }
1503     }
1504 
1505     const bool needsDstBarrier =
1506         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1507     if (needsDstBarrier) {
1508         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1509         PLUGIN_UNUSED(handleType);
1510         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1511         if (handleType == RenderHandleType::GPU_BUFFER) {
1512             BindableBuffer bRes;
1513             bRes.handle = dstHandle;
1514             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1515                 GpuResourceState {
1516                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1517                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1518         } else {
1519             BindableImage bRes;
1520             bRes.handle = dstHandle;
1521             bRes.mip = dstImgLayers.mipLevel;
1522             bRes.layer = dstImgLayers.baseArrayLayer;
1523             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1524             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1525                 GpuResourceState {
1526                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1527                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1528         }
1529     }
1530 }
1531 
HandleBuildAccelerationStructure(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1532 void RenderGraph::HandleBuildAccelerationStructure(ParameterCache& params, const uint32_t& commandListCommandIndex,
1533     const array_view<const RenderCommandWithType>& cmdListRef)
1534 {
1535     const uint32_t nextListIdx = commandListCommandIndex + 1;
1536     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1537     const auto& nextCmdRef = cmdListRef[nextListIdx];
1538     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BUILD_ACCELERATION_STRUCTURE);
1539 
1540     const RenderCommandBuildAccelerationStructure& nextRc =
1541         *static_cast<RenderCommandBuildAccelerationStructure*>(nextCmdRef.rc);
1542 
1543     for (const auto& instancesRef : nextRc.instancesView) {
1544         // usually the bottom level which needs to be waited to be finished
1545         const RenderHandle handle = instancesRef.data.handle;
1546         const bool needsBarrier = CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1547         if (needsBarrier) {
1548             const BindableBuffer bRes = { handle, 0U, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1549             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1550                 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1551                     CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1552                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1553         }
1554     }
1555 
1556     const auto& geometry = nextRc.geometry;
1557 
1558     // NOTE: mostly empty at the moment
1559     const bool needsSrcBarrier =
1560         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, geometry.srcAccelerationStructure);
1561     if (needsSrcBarrier) {
1562         const BindableBuffer bRes = { geometry.srcAccelerationStructure, 0U,
1563             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1564         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1565             GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1566                 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1567             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1568     }
1569 
1570     const bool needsDstBarrier =
1571         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, geometry.dstAccelerationStructure);
1572     if (needsDstBarrier) {
1573         const BindableBuffer bRes = { geometry.dstAccelerationStructure, 0U,
1574             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1575         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1576             GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT,
1577                 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1578             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1579     }
1580 }
1581 
HandleCopyAccelerationStructureInstances(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1582 void RenderGraph::HandleCopyAccelerationStructureInstances(ParameterCache& params,
1583     const uint32_t& commandListCommandIndex, const array_view<const RenderCommandWithType>& cmdListRef)
1584 {
1585     const uint32_t nextListIdx = commandListCommandIndex + 1;
1586     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1587     const auto& nextCmdRef = cmdListRef[nextListIdx];
1588     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES);
1589 
1590     const RenderCommandCopyAccelerationStructureInstances& nextRc =
1591         *static_cast<RenderCommandCopyAccelerationStructureInstances*>(nextCmdRef.rc);
1592 
1593     // NOTE: nextRc.destination.handle will be copied on CPU, no barriers needed
1594 
1595     for (const auto& instancesRef : nextRc.instancesView) {
1596         const RenderHandle handle = instancesRef.accelerationStructure;
1597         const bool needsSrcBarrier =
1598             CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1599         if (needsSrcBarrier) {
1600             const BindableBuffer bRes = { handle, 0U, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1601             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1602                 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1603                     CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1604                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1605         }
1606     }
1607 }
1608 
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1609 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1610     const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1611 {
1612     const uint32_t nextListIdx = commandListCommandIndex + 1;
1613     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1614     const auto& nextCmdRef = cmdListRef[nextListIdx];
1615     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1616 
1617     const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1618 
1619     const bool needsArgsBarrier =
1620         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1621     if (needsArgsBarrier) {
1622         const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1623         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1624             GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1625                 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1626             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1627     }
1628 }
1629 
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1630 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1631     const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1632     const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1633 {
1634     for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1635         if (RenderHandleUtil::GetHandleType(descriptorSetHandle) != RenderHandleType::DESCRIPTOR_SET) {
1636             continue;
1637         }
1638 
1639         // NOTE: for global descriptor sets we didn't know with render command list if it had dynamic resources
1640         const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(descriptorSetHandle);
1641         if (additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) {
1642             if (!nodeDescriptorSetMgrRef.HasDynamicBarrierResources(descriptorSetHandle)) {
1643                 continue;
1644             }
1645         }
1646 
1647         const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1648         const auto& buffers = bindingResources.buffers;
1649         const auto& images = bindingResources.images;
1650         for (const auto& refBuf : buffers) {
1651             const auto& ref = refBuf.desc;
1652             const uint32_t descriptorCount = ref.binding.descriptorCount;
1653             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1654             if (descriptorCount == 0) {
1655                 continue;
1656             }
1657             const uint32_t arrayOffset = ref.arrayOffset;
1658             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1659             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1660                 // first is the ref, starting from 1 we use array offsets
1661                 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1].desc;
1662                 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1663                     UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1664                 }
1665             }
1666         }
1667         for (const auto& refImg : images) {
1668             const auto& ref = refImg.desc;
1669             const uint32_t descriptorCount = ref.binding.descriptorCount;
1670             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1671             if (descriptorCount == 0) {
1672                 continue;
1673             }
1674             const uint32_t arrayOffset = ref.arrayOffset;
1675             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1676             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1677                 // first is the ref, starting from 1 we use array offsets
1678                 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1].desc;
1679                 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount,
1680                                         bRes.resource.handle)) {
1681                     UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1682                 }
1683             }
1684         }
1685     } // end for
1686 }
1687 
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1688 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1689     const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1690 {
1691     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1692     if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1693         return;
1694     }
1695 
1696     auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1697     // NOTE: we previous patched the final render pass layouts here
1698     // ATM: we only path the swapchain image if needed
1699 
1700     const GpuResourceState& prevState = ref.state;
1701     const BindableImage& prevImage = ref.resource;
1702     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1703     const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1704                                                 : GetSrcImageBarrier(prevState, prevImage);
1705 
1706     const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1707     // NOTE: we are not interested in access flags, only write access interests us
1708     // not this (prevStateRb.accessFlags NOT state.accessFlags)
1709     const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (state.accessFlags & WRITE_ACCESS_FLAGS);
1710     const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1711     // input attachments are handled with render passes and not with barriers
1712     if ((layoutChanged || writeTarget) && (!inputAttachment)) {
1713         if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1714             (prevState.gpuQueue.type != state.gpuQueue.type)) {
1715             PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1716 
1717             PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1718             currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1719                 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1720         } else {
1721             const ResourceBarrier dstImageBarrier =
1722                 addMips ? GetDstImageBarrierMips(state, prevImage, res) : GetDstImageBarrier(state, res);
1723             params.combinedBarriers.push_back(
1724                 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1725         }
1726 
1727         ref.state = state;
1728         ref.resource = res;
1729         ref.prevRc = params.rcWithType;
1730         ref.prevRenderNodeIndex = params.renderNodeIndex;
1731         if (addMips) {
1732             ModifyAdditionalImageState(res, ref.additionalState);
1733         }
1734     }
1735 }
1736 
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1737 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1738     const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1739 {
1740     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1741     if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1742         return;
1743     }
1744 
1745     // get the current state of the buffer
1746     auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1747     const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1748     // if previous or current state is write -> barrier
1749     if ((prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (dstState.accessFlags & WRITE_ACCESS_FLAGS)) {
1750         params.combinedBarriers.push_back(CommandBarrier {
1751             res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1752     }
1753 
1754     // update the cached state to match the situation after the barrier
1755     srcStateRef.state = dstState;
1756     srcStateRef.resource = res;
1757     srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1758 }
1759 
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1760 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1761     const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer, vector<CommandBarrier>& barriers,
1762     vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1763 {
1764     auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1765     const GpuResourceState srcState = stateRef.state;
1766     const BindableBuffer srcBuffer = stateRef.resource;
1767 
1768     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1769         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1770         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1771         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1772         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1773         currNodeGpuResourceTransfer.push_back(
1774             RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1775                 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1776     } else {
1777         const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1778         const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1779 
1780         barriers.push_back(CommandBarrier {
1781             newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1782     }
1783 
1784     stateRef.state = newGpuResourceState;
1785     stateRef.resource = newBuffer;
1786     stateRef.prevRenderNodeIndex = renderNodeIndex;
1787 }
1788 
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1789 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1790     const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1791     vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1792 {
1793     // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1794 
1795     auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1796     const GpuResourceState srcState = stateRef.state;
1797     const BindableImage srcImage = stateRef.resource;
1798     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1799 
1800     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1801         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1802         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1803         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1804         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1805         currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1806             stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1807     } else {
1808         const ResourceBarrier srcBarrier =
1809             addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1810                     : GetSrcImageBarrier(srcState, srcImage);
1811         const ResourceBarrier dstBarrier = addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage)
1812                                                    : GetDstImageBarrier(newGpuResourceState, newImage);
1813 
1814         barriers.push_back(CommandBarrier {
1815             newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1816     }
1817 
1818     stateRef.state = newGpuResourceState;
1819     stateRef.resource = newImage;
1820     stateRef.prevRc = rcWithType;
1821     stateRef.prevRenderNodeIndex = renderNodeIndex;
1822     if (addMips) {
1823         ModifyAdditionalImageState(newImage, stateRef.additionalState);
1824     }
1825 }
1826 
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1827 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1828     const RenderHandle handle, const GpuQueue& queue)
1829 {
1830     // NOTE: Do not call with non dynamic trackable
1831     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1832     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1833     if (arrayIndex < gpuBufferDataIndices_.size()) {
1834         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1835         uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1836         if (dataIdx == INVALID_TRACK_IDX) {
1837             if (!gpuBufferAvailableIndices_.empty()) {
1838                 dataIdx = gpuBufferAvailableIndices_.back();
1839                 gpuBufferAvailableIndices_.pop_back();
1840             } else {
1841                 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1842                 gpuBufferTracking_.emplace_back();
1843             }
1844             gpuBufferDataIndices_[arrayIndex] = dataIdx;
1845 
1846             gpuBufferTracking_[dataIdx].resource.handle = handle;
1847             gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1848         }
1849         return gpuBufferTracking_[dataIdx];
1850     }
1851 
1852     return defaultBufferState_;
1853 }
1854 
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1855 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1856     const RenderHandle handle, const GpuQueue& queue)
1857 {
1858     // NOTE: Do not call with non dynamic trackable
1859     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1860     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1861     if (arrayIndex < gpuImageDataIndices_.size()) {
1862         // NOTE: render pass attachments expected to be dynamic resources always
1863         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1864         uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1865         if (dataIdx == INVALID_TRACK_IDX) {
1866             if (!gpuImageAvailableIndices_.empty()) {
1867                 dataIdx = gpuImageAvailableIndices_.back();
1868                 gpuImageAvailableIndices_.pop_back();
1869             } else {
1870                 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1871                 gpuImageTracking_.emplace_back();
1872             }
1873             gpuImageDataIndices_[arrayIndex] = dataIdx;
1874 
1875             gpuImageTracking_[dataIdx].resource.handle = handle;
1876             gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1877             if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1878                 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1879                 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1880             }
1881         }
1882 #if (RENDER_VALIDATION_ENABLED == 1)
1883         if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1884             (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1885             PLUGIN_LOG_ONCE_W("dynamic_state_mips_issue_" + to_string(handle.id),
1886                 "RENDER_VALIDATION: Additional mip states missing (handle:%" PRIx64 ")", handle.id);
1887         }
1888 #endif
1889         return gpuImageTracking_[dataIdx];
1890     }
1891 
1892     PLUGIN_LOG_ONCE_W("render_graph_image_state_issues", "RenderGraph: Image tracking issue with handle count");
1893     return defaultImageState_;
1894 }
1895 RENDER_END_NAMESPACE()
1896