• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_graph.h"
17 
18 #include <cinttypes>
19 
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24 
25 #include "device/device.h"
26 #include "device/gpu_resource_cache.h"
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32 
33 using namespace BASE_NS;
34 
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38 
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43 
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
47         switch (rc.type) {
48             case RenderCommandType::DRAW: {
49                 PLUGIN_LOG_I("rc: Draw");
50                 break;
51             }
52             case RenderCommandType::DRAW_INDIRECT: {
53                 PLUGIN_LOG_I("rc: DrawIndirect");
54                 break;
55             }
56             case RenderCommandType::DISPATCH: {
57                 PLUGIN_LOG_I("rc: Dispatch");
58                 break;
59             }
60             case RenderCommandType::DISPATCH_INDIRECT: {
61                 PLUGIN_LOG_I("rc: DispatchIndirect");
62                 break;
63             }
64             case RenderCommandType::BIND_PIPELINE: {
65                 PLUGIN_LOG_I("rc: BindPipeline");
66                 break;
67             }
68             case RenderCommandType::BEGIN_RENDER_PASS: {
69                 PLUGIN_LOG_I("rc: BeginRenderPass");
70                 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
71                     const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
72                     for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
73                         const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
74                         PLUGIN_LOG_I("    attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
75                     }
76                     PLUGIN_LOG_I("    subpass count: %u, subpass start idx: %u",
77                         (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
78                 }
79                 break;
80             }
81             case RenderCommandType::NEXT_SUBPASS: {
82                 PLUGIN_LOG_I("rc: NextSubpass");
83                 break;
84             }
85             case RenderCommandType::END_RENDER_PASS: {
86                 PLUGIN_LOG_I("rc: EndRenderPass");
87                 break;
88             }
89             case RenderCommandType::BIND_VERTEX_BUFFERS: {
90                 PLUGIN_LOG_I("rc: BindVertexBuffers");
91                 break;
92             }
93             case RenderCommandType::BIND_INDEX_BUFFER: {
94                 PLUGIN_LOG_I("rc: BindIndexBuffer");
95                 break;
96             }
97             case RenderCommandType::COPY_BUFFER: {
98                 PLUGIN_LOG_I("rc: CopyBuffer");
99                 break;
100             }
101             case RenderCommandType::COPY_BUFFER_IMAGE: {
102                 PLUGIN_LOG_I("rc: CopyBufferImage");
103                 break;
104             }
105             case RenderCommandType::COPY_IMAGE: {
106                 PLUGIN_LOG_I("rc: CopyImage");
107                 break;
108             }
109             case RenderCommandType::BLIT_IMAGE: {
110                 PLUGIN_LOG_I("rc: BlitImage");
111                 break;
112             }
113             case RenderCommandType::BARRIER_POINT: {
114                 PLUGIN_LOG_I("rc: BarrierPoint");
115                 break;
116             }
117             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
118                 PLUGIN_LOG_I("rc: BindDescriptorSets");
119                 break;
120             }
121             case RenderCommandType::PUSH_CONSTANT: {
122                 PLUGIN_LOG_I("rc: PushConstant");
123                 break;
124             }
125             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
126                 PLUGIN_LOG_I("rc: BuildAccelerationStructure");
127                 break;
128             }
129             case RenderCommandType::CLEAR_COLOR_IMAGE: {
130                 PLUGIN_LOG_I("rc: ClearColorImage");
131                 break;
132             }
133 
134             // dynamic states
135             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
136                 PLUGIN_LOG_I("rc: DynamicStateViewport");
137                 break;
138             }
139             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
140                 PLUGIN_LOG_I("rc: DynamicStateScissor");
141                 break;
142             }
143             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
144                 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
145                 break;
146             }
147             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
148                 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
149                 break;
150             }
151             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
152                 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
153                 break;
154             }
155             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
156                 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
157                 break;
158             }
159             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
160                 PLUGIN_LOG_I("rc: DynamicStateStencil");
161                 break;
162             }
163             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
164                 PLUGIN_LOG_I("rc: DynamicStateFragmentShadingRate");
165                 break;
166             }
167             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
168                 PLUGIN_LOG_I("rc: ExecuteBackendFramePosition");
169                 break;
170             }
171 
172             case RenderCommandType::WRITE_TIMESTAMP: {
173                 PLUGIN_LOG_I("rc: WriteTimestamp");
174                 break;
175             }
176             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
177                 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
178                 break;
179             }
180             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
181                 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
182                 break;
183             }
184             case RenderCommandType::BEGIN_DEBUG_MARKER: {
185                 PLUGIN_LOG_I("rc: BeginDebugMarker");
186                 break;
187             }
188             case RenderCommandType::END_DEBUG_MARKER: {
189                 PLUGIN_LOG_I("rc: EndDebugMarker");
190                 break;
191             }
192             case RenderCommandType::UNDEFINED:
193             case RenderCommandType::COUNT: {
194                 PLUGIN_ASSERT(false && "non-valid render command");
195                 break;
196             }
197         }
198     }
199 }
200 
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)201 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
202 {
203     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
204         for (const auto& ref : combinedBarriers) {
205             const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
206             if (type == RenderHandleType::GPU_BUFFER) {
207                 PLUGIN_LOG_I("barrier buffer    :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
208                     ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
209                     ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags);
210             } else {
211                 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
212                 PLUGIN_LOG_I("barrier image     :: handle:0x%" PRIx64
213                              " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
214                     ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
215                     ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags, ref.src.optionalImageLayout,
216                     ref.dst.optionalImageLayout);
217             }
218         }
219     }
220 }
221 
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)222 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
223 {
224     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
225         for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
226             const auto handle = rc.renderPassDesc.attachmentHandles[idx];
227             const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
228             const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
229             PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64
230                          " name:%s, src_layout:%u dst_layout:%u (patched later)",
231                 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
232         }
233     }
234 }
235 
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)236 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
237 {
238     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
239         // NOTE: gpuHandle might be the same when generation index wraps around
240         // and when using shallow handles (shadow -> re-use normal -> shadow -> re-use normal etc)
241         const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
242         PLUGIN_LOG_I("image_state   :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
243             resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
244             resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
245             RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
246             RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
247         // one could fetch and print vulkan handle here as well e.g.
248         // 1. const GpuImagePlatformDataVk& plat =
249         // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
250         // 3. PLUGIN_LOG_I("end_frame image   :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
251     }
252 }
253 #endif // RENDER_DEV_ENABLED
254 
255 constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
256                                         CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
257                                         CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
258                                         CORE_ACCESS_MEMORY_WRITE_BIT;
259 
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)260 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
261     RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
262 {
263     const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
264     for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
265         if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
266             beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
267             storeState.resource.imageLayout = imageLayout;
268         }
269     }
270 }
271 
UpdateMultiRenderCommandListRenderPasses(Device & device,RenderGraph::MultiRenderPassStore & store)272 void UpdateMultiRenderCommandListRenderPasses(Device& device, RenderGraph::MultiRenderPassStore& store)
273 {
274     const auto renderPassCount = (uint32_t)store.renderPasses.size();
275     PLUGIN_ASSERT(renderPassCount > 1);
276 
277     RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
278     PLUGIN_ASSERT(firstRenderPass);
279     PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
280     const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
281     PLUGIN_ASSERT(lastRenderPass);
282 
283     const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
284 
285     // take attachment loads from the first one, and stores from the last one
286     // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
287     // initial store the correct render pass description to first render pass and then copy to others
288     // resource states are copied from valid subpasses to another render command list subpasses
289     for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
290         const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
291         const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
292         const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
293         for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
294             if (fromRpIdx != toRpIdx) {
295                 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
296                 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
297                 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
298                     toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
299                     toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
300                 }
301             }
302         }
303     }
304 
305     for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
306         firstRenderPass->renderPassDesc.attachments[idx].storeOp =
307             lastRenderPass->renderPassDesc.attachments[idx].storeOp;
308         firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
309             lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
310 
311         firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
312             lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
313     }
314 
315     // copy subpasses to first and mark if merging subpasses
316     bool mergeSubpasses = false;
317     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
318         firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
319         if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
320             mergeSubpasses = true;
321         }
322     }
323     // NOTE: only use merge subpasses in vulkan at the moment
324     if (device.GetBackendType() != DeviceBackendType::VULKAN) {
325         mergeSubpasses = false;
326     }
327 
328     uint32_t subpassCount = renderPassCount;
329     if (mergeSubpasses) {
330         PLUGIN_ASSERT(renderPassCount > 1U);
331         // merge from back to front
332         const uint32_t finalSubpass = renderPassCount - 1U;
333         uint32_t mergeCount = 0U;
334         for (uint32_t idx = finalSubpass; idx > 0U; --idx) {
335             if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
336                 PLUGIN_ASSERT(idx > 0U);
337 
338                 uint32_t prevSubpassIdx = idx - 1U;
339                 auto& currSubpass = firstRenderPass->subpasses[idx];
340                 auto& prevSubpass = firstRenderPass->subpasses[prevSubpassIdx];
341                 // cannot merge in these cases
342                 if (currSubpass.inputAttachmentCount != prevSubpass.inputAttachmentCount) {
343                     currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
344 #if (RENDER_VALIDATION_ENABLED == 1)
345                     PLUGIN_LOG_W(
346                         "RENDER_VALIDATION: Trying to merge subpasses with input attachments, undefined results");
347 #endif
348                 }
349                 if (prevSubpass.resolveAttachmentCount > currSubpass.resolveAttachmentCount) {
350                     currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
351 #if (RENDER_VALIDATION_ENABLED == 1)
352                     PLUGIN_LOG_W("RENDER_VALIDATION: Trying to merge subpasses with different resolve counts, "
353                                  "undefined results");
354 #endif
355                 }
356                 if ((currSubpass.subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
357                     // merge failed -> continue
358                     continue;
359                 }
360 
361                 mergeCount++;
362                 auto& currRenderPass = store.renderPasses[idx];
363                 const auto& currSubpassResourceStates = currRenderPass->subpassResourceStates[idx];
364                 currRenderPass->subpassStartIndex = currRenderPass->subpassStartIndex - 1U;
365                 // can merge
366                 currSubpass.subpassFlags |= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
367 
368                 auto& prevRenderPass = store.renderPasses[prevSubpassIdx];
369                 auto& prevSubpassResourceStates = prevRenderPass->subpassResourceStates[prevSubpassIdx];
370                 // NOTE: at the moment copies everything from the current subpass
371                 CloneData(&prevSubpass, sizeof(RenderPassSubpassDesc), &currSubpass, sizeof(RenderPassSubpassDesc));
372                 // copy layouts and states from the current to previous
373                 for (uint32_t resourceIdx = 0U; resourceIdx < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT;
374                      ++resourceIdx) {
375                     prevSubpassResourceStates.layouts[resourceIdx] = currSubpassResourceStates.layouts[resourceIdx];
376                     prevSubpassResourceStates.states[resourceIdx] = currSubpassResourceStates.states[resourceIdx];
377                 }
378             }
379         }
380 
381         // new minimal subpass count
382         subpassCount = subpassCount - mergeCount;
383         firstRenderPass->renderPassDesc.subpassCount = subpassCount;
384         firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), subpassCount };
385         // update subpass start indices
386         uint32_t subpassStartIndex = 0;
387         for (uint32_t idx = 1U; idx < renderPassCount; ++idx) {
388             auto& currRenderPass = store.renderPasses[idx];
389             if (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
390                 currRenderPass->subpassStartIndex = subpassStartIndex;
391             } else {
392                 subpassStartIndex++;
393             }
394         }
395     }
396 
397     // copy from first to following render passes
398     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
399         // subpass start index is the only changing variables
400         auto& currRenderPass = store.renderPasses[idx];
401         const uint32_t subpassStartIndex = currRenderPass->subpassStartIndex;
402         currRenderPass->renderPassDesc = firstRenderPass->renderPassDesc;
403         // advance subpass start index if not merging
404         if (mergeSubpasses &&
405             ((idx < currRenderPass->subpasses.size()) &&
406                 (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT))) {
407             // NOTE: subpassResourceStates are copied in this case
408             currRenderPass->subpassResourceStates[subpassStartIndex] =
409                 firstRenderPass->subpassResourceStates[subpassStartIndex];
410         }
411         currRenderPass->subpassStartIndex = subpassStartIndex;
412         // copy all subpasses and input resource states
413         currRenderPass->subpasses = firstRenderPass->subpasses;
414         currRenderPass->inputResourceStates = firstRenderPass->inputResourceStates;
415         // image layouts needs to match
416         currRenderPass->imageLayouts = firstRenderPass->imageLayouts;
417         // NOTE: subpassResourceStates are only copied when doing merging
418     }
419 }
420 
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)421 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
422 {
423     return {
424         state.accessFlags,
425         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
426         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
427         res.byteOffset,
428         res.byteSize,
429     };
430 }
431 
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)432 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
433 {
434     return {
435         state.accessFlags,
436         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
437         res.imageLayout,
438         0,
439         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
440     };
441 }
442 
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)443 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
444     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
445 {
446     uint32_t mipLevel = 0U;
447     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
448     ImageLayout srcImageLayout = src.imageLayout;
449     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
450         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
451         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
452             mipLevel = dst.mip;
453             mipCount = 1U;
454         } else {
455             mipLevel = src.mip;
456             // all mip levels
457         }
458         PLUGIN_ASSERT(additionalImageState.layouts);
459         srcImageLayout = additionalImageState.layouts[mipLevel];
460     }
461     return {
462         state.accessFlags,
463         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
464         srcImageLayout,
465         0,
466         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
467         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
468     };
469 }
470 
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)471 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
472 {
473     return {
474         state.accessFlags,
475         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
476         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
477         res.byteOffset,
478         res.byteSize,
479     };
480 }
481 
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)482 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
483 {
484     return {
485         state.accessFlags,
486         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
487         res.imageLayout,
488         0,
489         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
490     };
491 }
492 
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst)493 ResourceBarrier GetDstImageBarrierMips(
494     const GpuResourceState& state, const BindableImage& src, const BindableImage& dst)
495 {
496     uint32_t mipLevel = 0U;
497     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
498     ImageLayout dstImageLayout = dst.imageLayout;
499     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
500         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
501         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
502             mipLevel = dst.mip;
503             mipCount = 1U;
504         } else {
505             mipLevel = src.mip;
506             // all mip levels
507         }
508     }
509     return {
510         state.accessFlags,
511         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
512         dstImageLayout,
513         0,
514         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
515         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
516     };
517 }
518 
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)519 void ModifyAdditionalImageState(
520     const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
521 {
522 #if (RENDER_VALIDATION_ENABLED == 1)
523     // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
524     PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
525 #endif
526     if (additionalStateRef.layouts) {
527         if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
528             (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
529             additionalStateRef.layouts[res.mip] = res.imageLayout;
530         } else {
531             // set layout for all mips
532             for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
533                 additionalStateRef.layouts[idx] = res.imageLayout;
534             }
535         }
536     } else {
537 #if (RENDER_VALIDATION_ENABLED == 1)
538         PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
539 #endif
540     }
541 }
542 
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)543 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
544     const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
545 {
546     return {
547         handle,
548 
549         ResourceBarrier {
550             0,
551             PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
552             srcImageLayout,
553             0,
554             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
555             ImageSubresourceRange {},
556         },
557         srcGpuQueue,
558 
559         ResourceBarrier {
560             0,
561             PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
562             dstImageLayout,
563             0,
564             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
565             ImageSubresourceRange {},
566         },
567         dstGpuQueue,
568     };
569 }
570 
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)571 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
572     array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
573 {
574     for (const auto& transferRef : currNodeGpuResourceTransfers) {
575         PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
576         if (transferRef.acquireNodeIdx >= frameRenderNodeContextData.size()) {
577             // skip
578             continue;
579         }
580 
581         auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
582         const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
583         GpuQueue releaseGpuQueue = acquireGpuQueue;
584 
585         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
586             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
587             releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
588         }
589 
590         const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
591             acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
592 
593         // release ownership (NOTE: not done for previous frame)
594         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
595             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
596             const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
597             const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
598             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
599             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
600             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
601 
602             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
603             releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
604 
605             // inform that we are patching valid barriers
606             releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
607         }
608         // acquire ownership
609         {
610             const uint32_t rcIndex = 0;
611             const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
612             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
613             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
614             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
615 
616             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
617             acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
618 
619             // inform that we are patching valid barriers
620             acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
621         }
622     }
623 }
624 
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)625 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
626     const uint32_t customBarrierCount, const RenderHandle handle)
627 {
628     bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
629     if ((customBarrierCount > 0) && needsBarrier) {
630         needsBarrier = (handledCustomBarriers.count(handle) == 0);
631     }
632     return needsBarrier;
633 }
634 } // namespace
635 
RenderGraph(Device & device)636 RenderGraph::RenderGraph(Device& device)
637     : device_(device), gpuResourceMgr_((GpuResourceManager&)device.GetGpuResourceManager())
638 {}
639 
BeginFrame()640 void RenderGraph::BeginFrame()
641 {
642     stateCache_.multiRenderPassStore.renderPasses.clear();
643     stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
644     stateCache_.multiRenderPassStore.supportOpen = false;
645     stateCache_.nodeCounter = 0u;
646     stateCache_.checkForBackbufferDependency = false;
647     stateCache_.usesSwapchainImage = false;
648 }
649 
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)650 void RenderGraph::ProcessRenderNodeGraph(
651     const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
652 {
653     stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
654 
655     // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
656     // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
657     // handles:
658     // gpu images in descriptor sets, render passes, blits, and custom barriers
659     // gpu buffers in descriptor sets, and custom barriers
660 
661     {
662         // remove resources that will not be tracked anymore and release available slots
663         const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
664         for (const auto& handle : stateResetData.resources) {
665             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
666             const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
667             if ((handleType == RenderHandleType::GPU_IMAGE) &&
668                 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
669                 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
670                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
671                     gpuImageTracking_[dataIdx] = {}; // reset
672                     gpuImageAvailableIndices_.push_back(dataIdx);
673                 }
674                 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
675             } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
676                 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
677                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
678                     gpuBufferTracking_[dataIdx] = {}; // reset
679                     gpuBufferAvailableIndices_.push_back(dataIdx);
680                 }
681                 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
682             }
683         }
684     }
685 
686     gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
687     gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
688 
689 #if (RENDER_DEV_ENABLED == 1)
690     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
691                   CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
692         static uint64_t debugFrame = 0;
693         debugFrame++;
694         PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
695     }
696 #endif
697 
698     // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
699     ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
700 
701     // store final state for next frame
702     StoreFinalBufferState();
703     StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
704 }
705 
GetSwapchainResourceStates() const706 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
707 {
708     return swapchainStates_;
709 }
710 
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)711 void RenderGraph::ProcessRenderNodeGraphNodeStores(
712     const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
713 {
714     for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
715         PLUGIN_ASSERT(graphStore);
716         if (!graphStore) {
717             continue;
718         }
719 
720         for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
721             auto& ref = graphStore->renderNodeContextData[nodeIdx];
722             ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
723             stateCache.usesSwapchainImage = false;                // reset
724 
725 #if (RENDER_DEV_ENABLED == 1)
726             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
727                 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
728             }
729 #endif
730 
731             if (stateCache.multiRenderPassStore.supportOpen && (stateCache.multiRenderPassStore.renderPasses.empty())) {
732                 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
733                 // NOTE: add more error handling and invalidate render command lists
734             }
735             stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
736             array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
737             // go through commands that affect or need transitions and barriers
738             ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
739 
740             // needs backbuffer/swapchain wait
741             if (stateCache.usesSwapchainImage) {
742                 ref.submitInfo.waitForSwapchainAcquireSignal = true;
743             }
744 
745             // patch gpu resource queue transfers
746             if (!currNodeGpuResourceTransfers_.empty()) {
747                 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
748                 // clear for next use
749                 currNodeGpuResourceTransfers_.clear();
750             }
751 
752             stateCache_.nodeCounter++;
753         }
754     }
755 }
756 
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)757 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
758     const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
759 {
760     for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
761         auto& cmdRef = cmdListRef[listIdx];
762 
763 #if (RENDER_DEV_ENABLED == 1)
764         if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
765             DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
766         }
767 #endif
768 
769         // most of the commands are handled within BarrierPoint
770         switch (cmdRef.type) {
771             case RenderCommandType::BARRIER_POINT:
772                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
773                 break;
774 
775             case RenderCommandType::BEGIN_RENDER_PASS:
776                 RenderCommand(
777                     nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
778                 break;
779 
780             case RenderCommandType::END_RENDER_PASS:
781                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
782                 break;
783 
784             case RenderCommandType::NEXT_SUBPASS:
785             case RenderCommandType::DRAW:
786             case RenderCommandType::DRAW_INDIRECT:
787             case RenderCommandType::DISPATCH:
788             case RenderCommandType::DISPATCH_INDIRECT:
789             case RenderCommandType::BIND_PIPELINE:
790             case RenderCommandType::BIND_VERTEX_BUFFERS:
791             case RenderCommandType::BIND_INDEX_BUFFER:
792             case RenderCommandType::COPY_BUFFER:
793             case RenderCommandType::COPY_BUFFER_IMAGE:
794             case RenderCommandType::COPY_IMAGE:
795             case RenderCommandType::BIND_DESCRIPTOR_SETS:
796             case RenderCommandType::PUSH_CONSTANT:
797             case RenderCommandType::BLIT_IMAGE:
798             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
799             case RenderCommandType::CLEAR_COLOR_IMAGE:
800             case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
801             case RenderCommandType::DYNAMIC_STATE_SCISSOR:
802             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
803             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
804             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
805             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
806             case RenderCommandType::DYNAMIC_STATE_STENCIL:
807             case RenderCommandType::WRITE_TIMESTAMP:
808             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
809             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
810             case RenderCommandType::UNDEFINED:
811             default: {
812                 // nop
813                 break;
814             }
815         }
816     } // end command for
817 }
818 
StoreFinalBufferState()819 void RenderGraph::StoreFinalBufferState()
820 {
821     for (auto& ref : gpuBufferTracking_) {
822         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
823             ref = {};
824             continue;
825         }
826         // NOTE: we cannot soft reset here
827         // if we do so some buffer usage might overlap in the next frame
828         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
829             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
830             const RenderHandle handle = ref.resource.handle;
831             ref = {};
832             ref.resource.handle = handle;
833         }
834 
835         // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
836         // frames)
837         ref.prevRenderNodeIndex = { ~0u };
838     }
839 }
840 
StoreFinalImageState()841 void RenderGraph::StoreFinalImageState()
842 {
843     swapchainStates_ = {}; // reset
844 
845 #if (RENDER_DEV_ENABLED == 1)
846     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
847         PLUGIN_LOG_I("end_frame image_state:");
848     }
849 #endif
850     for (auto& ref : gpuImageTracking_) {
851         // if resource is not dynamic, we do not track and care
852         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
853             ref = {};
854             continue;
855         }
856         // handle automatic presentation layout
857         if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
858             if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
859                 RenderCommandBeginRenderPass& beginRenderPass =
860                     *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
861                 PatchRenderPassFinalLayout(
862                     ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
863             }
864             // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
865             // store final state for backbuffer
866             // currently we only swapchains if they are really in use in this frame
867             const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
868             if (flags != 0) {
869                 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
870             }
871         }
872 #if (RENDER_DEV_ENABLED == 1)
873         // print before reset for next frame
874         if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
875             DebugPrintImageState(gpuResourceMgr_, ref);
876         }
877 #endif
878         // shallow resources are not tracked
879         // they are always in undefined state in the beging of the frame
880         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
881             const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
882             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
883             const RenderHandle handle = ref.resource.handle;
884             ref = {};
885             ref.resource.handle = handle;
886             if (addMips) {
887                 PLUGIN_ASSERT(!ref.additionalState.layouts);
888                 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
889             }
890         } else {
891             // NOTE: render pass compatibility hashing with stages and access flags
892             // creates quite many new graphics pipelines in the first few frames
893             // do soft reset here to prevent access flags from previous frame
894             // NOTE: in theory this soft reset might create overlap of rendering to a same target
895             ref.state.accessFlags = 0;
896             ref.state.pipelineStageFlags = 0;
897             ref.state.shaderStageFlags = 0;
898         }
899 
900         // need to reset per frame variables for all images (so we do not try to patch from previous frames)
901         ref.prevRc = {};
902         ref.prevRenderNodeIndex = { ~0u };
903     }
904 }
905 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)906 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
907     RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
908 {
909     // update layouts for attachments to gpu image state
910     BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
911 
912     PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
913 
914     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
915     if (hasRenderPassDependency) { // stitch render pass subpasses
916         BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
917     }
918 
919     const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
920 
921     auto finalImageLayouts =
922         array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
923 
924     BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
925 
926     for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
927         const auto& subpassRef = rc.subpasses[subpassIdx];
928         const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
929 
930         BeginRenderPassUpdateSubpassImageStates(
931             array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
932             subpassResourceStatesRef, finalImageLayouts);
933 
934         BeginRenderPassUpdateSubpassImageStates(
935             array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
936             subpassResourceStatesRef, finalImageLayouts);
937 
938         BeginRenderPassUpdateSubpassImageStates(
939             array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
940             subpassResourceStatesRef, finalImageLayouts);
941 
942         if (subpassRef.depthAttachmentCount == 1u) {
943             BeginRenderPassUpdateSubpassImageStates(
944                 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
945                 subpassResourceStatesRef, finalImageLayouts);
946             if (subpassRef.depthResolveAttachmentCount == 1) {
947                 BeginRenderPassUpdateSubpassImageStates(
948                     array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
949                     rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
950             }
951         }
952         if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
953             BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
954                                                         subpassRef.fragmentShadingRateAttachmentCount),
955                 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
956         }
957     }
958 
959     if (hasRenderPassDependency) { // stitch render pass subpasses
960         if (rc.subpassStartIndex > 0) {
961             // stitched to behave as a nextSubpass() and not beginRenderPass()
962             rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
963         }
964         const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
965         if (finalSubpass) {
966             UpdateMultiRenderCommandListRenderPasses(device_, stateCache.multiRenderPassStore);
967             // multiRenderPassStore cleared in EndRenderPass
968         }
969     }
970 #if (RENDER_DEV_ENABLED == 1)
971     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
972         DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
973     }
974 #endif
975 }
976 
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)977 void RenderGraph::BeginRenderPassHandleDependency(
978     BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
979 {
980     params.stateCache.multiRenderPassStore.renderPasses.push_back(&params.rc);
981     // store the first begin render pass
982     params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
983         params.stateCache.multiRenderPassStore.renderPasses[0] };
984 
985     if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
986 #ifndef NDEBUG
987         // barrier point must be previous command
988         PLUGIN_ASSERT(commandListCommandIndex >= 1);
989         const uint32_t prevCommandIndex = commandListCommandIndex - 1;
990         const RenderCommandWithType& barrierPointCmdRef =
991             nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
992         PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
993         PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
994 #endif
995         params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
996     }
997 }
998 
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)999 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
1000     array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
1001 {
1002     auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
1003     const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
1004     auto& attachments = params.rc.renderPassDesc.attachments;
1005     auto& attachmentInputResourceStates = params.rc.inputResourceStates;
1006 
1007     for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
1008         const RenderHandle handle = attachmentHandles[attachmentIdx];
1009         // NOTE: invalidate invalid handle commands already in render command list
1010         if (!RenderHandleUtil::IsGpuImage(handle)) {
1011 #ifdef _DEBUG
1012             PLUGIN_LOG_E("invalid handle in render node graph");
1013 #endif
1014             continue;
1015         }
1016         auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
1017         ImageLayout imgLayout = stateRef.resource.imageLayout;
1018 
1019         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1020         // image layout is undefined if automatic barriers have been disabled
1021         if (params.rc.enableAutomaticLayoutChanges) {
1022             const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
1023             if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
1024                 if (stateRef.additionalState.layouts) {
1025                     imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
1026                 } else {
1027 #if (RENDER_VALIDATION_ENABLED == 1)
1028                     PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
1029 #endif
1030                 }
1031             }
1032 
1033             initialImageLayouts[attachmentIdx] = imgLayout;
1034         }
1035         // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
1036         if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
1037             (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
1038             // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
1039             attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
1040         }
1041         finalImageLayouts[attachmentIdx] = imgLayout;
1042         attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
1043         attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
1044 
1045         // store render pass for final layout patching
1046         stateRef.prevRc = params.rpForCmdRef;
1047         stateRef.prevRenderNodeIndex = renderNodeIndex;
1048 
1049         // flag for backbuffer use
1050         if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
1051             params.stateCache.usesSwapchainImage = true;
1052         }
1053     }
1054 }
1055 
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts)1056 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
1057     const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
1058     array_view<ImageLayout> finalImageLayouts)
1059 {
1060     for (const uint32_t attachmentIndex : attatchmentIndices) {
1061         // NOTE: handle invalid commands already in render command list and invalidate draws etc.
1062         PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
1063         const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
1064         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1065         const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
1066         const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
1067         // NOTE: we should support non dynamicity and GENERAL
1068 
1069         finalImageLayouts[attachmentIndex] = refImgLayout;
1070         auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
1071         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1072 
1073         ref.state = refState;
1074         ref.resource.handle = handle;
1075         ref.resource.imageLayout = refImgLayout;
1076         if (addMips) {
1077             const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1078             const BindableImage image {
1079                 handle,
1080                 attachmentDesc.mipLevel,
1081                 attachmentDesc.layer,
1082                 refImgLayout,
1083                 RenderHandle {},
1084             };
1085             ModifyAdditionalImageState(image, ref.additionalState);
1086         }
1087     }
1088 }
1089 
RenderCommand(RenderCommandEndRenderPass & rc,StateCache & stateCache)1090 void RenderGraph::RenderCommand(RenderCommandEndRenderPass& rc, StateCache& stateCache)
1091 {
1092     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1093     if (hasRenderPassDependency) {
1094         const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1095         if (finalSubpass) {
1096             if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1097                 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1098                 // NOTE: add more error handling and invalidate render command lists
1099             }
1100             rc.endType = RenderPassEndType::END_RENDER_PASS;
1101             stateCache.multiRenderPassStore.renderPasses.clear();
1102             stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1103             stateCache.multiRenderPassStore.supportOpen = false;
1104         } else {
1105             rc.endType = RenderPassEndType::END_SUBPASS;
1106         }
1107     }
1108 }
1109 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1110 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1111     RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1112 {
1113     // go through required descriptors for current upcoming event
1114     const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1115     const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1116     const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1117     const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1118 
1119     parameterCachePools_.combinedBarriers.clear();
1120     parameterCachePools_.handledCustomBarriers.clear();
1121     ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1122         rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1123         nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1124     // first check custom barriers
1125     if (parameters.customBarrierCount > 0) {
1126         HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1127     }
1128     // then vertex / index buffer barriers in the barrier point before render pass
1129     if (parameters.vertexInputBarrierCount > 0) {
1130         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1131         HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1132             nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1133     }
1134     if (parameters.indirectBufferBarrierCount > 0U) {
1135         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1136         HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1137             nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1138     }
1139 
1140     // in barrier point the next render command is known for which the barrier is needed
1141     if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1142         HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1143     } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1144         HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1145     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1146         HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1147     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1148         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1149     } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1150         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1151     } else {                                                                    // descriptor sets
1152         if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1153             HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1154         }
1155         const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1156         const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1157         const uint32_t descriptorSetHandleMaxIndex =
1158             Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1159         const auto descriptorSetHandlesForBarriers =
1160             array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1161                 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1162         HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1163     }
1164 
1165     if (!parameters.combinedBarriers.empty()) {
1166         // use first render pass barrier point with following subpasses
1167         // firstRenderPassBarrierPoint is null for the first subpass
1168         const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1169         if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1170             PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1171             stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1172                 rc.barrierPointIndex, parameters.combinedBarriers);
1173         } else {
1174             nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1175         }
1176     }
1177 #if (RENDER_DEV_ENABLED == 1)
1178     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1179         DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1180     }
1181 #endif
1182 }
1183 
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1184 inline void RenderGraph::UpdateBufferResourceState(
1185     RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1186 {
1187     stateRef.resource.handle = cb.resourceHandle;
1188     stateRef.state.shaderStageFlags = 0;
1189     stateRef.state.accessFlags = cb.dst.accessFlags;
1190     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1191     stateRef.state.gpuQueue = params.gpuQueue;
1192     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1193 }
1194 
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1195 inline void RenderGraph::UpdateImageResourceState(
1196     RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1197 {
1198     stateRef.resource.handle = cb.resourceHandle;
1199     stateRef.state.shaderStageFlags = 0;
1200     stateRef.state.accessFlags = cb.dst.accessFlags;
1201     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1202     stateRef.state.gpuQueue = params.gpuQueue;
1203     stateRef.prevRc = params.rcWithType;
1204     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1205 }
1206 
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1207 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1208     const array_view<const CommandBarrier>& customBarrierListRef)
1209 {
1210     params.handledCustomBarriers.reserve(params.customBarrierCount);
1211     PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1212     for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1213               end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1214          begin != end; ++begin) {
1215         // add a copy and modify if needed
1216         auto& cb = params.combinedBarriers.emplace_back(*begin);
1217 
1218         // NOTE: undefined type is for non-resource memory/pipeline barriers
1219         const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1220         const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1221         PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1222                       (type == RenderHandleType::GPU_IMAGE));
1223         if (type == RenderHandleType::GPU_BUFFER) {
1224             if (isDynamicTrack) {
1225                 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1226                 UpdateBufferResourceState(stateRef, params, cb);
1227             }
1228             params.handledCustomBarriers[cb.resourceHandle] = 0;
1229         } else if (type == RenderHandleType::GPU_IMAGE) {
1230             if (isDynamicTrack) {
1231                 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1232                 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1233                 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1234                     uint32_t mipLevel = 0U;
1235                     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1236                     ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1237                     if (isAddMips) {
1238                         const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1239                         const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1240                         if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1241                             (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1242                             if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1243                                 mipLevel = dstMip;
1244                                 mipCount = 1U;
1245                             } else {
1246                                 mipLevel = srcMip;
1247                                 // all mip levels
1248                             }
1249                             if (stateRef.additionalState.layouts) {
1250                                 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1251                             } else {
1252 #if (RENDER_VALIDATION_ENABLED == 1)
1253                                 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1254 #endif
1255                             }
1256                         }
1257                     }
1258                     cb.src.accessFlags = stateRef.state.accessFlags;
1259                     cb.src.pipelineStageFlags =
1260                         stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1261                     cb.src.optionalImageLayout = srcImageLayout;
1262                     cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1263                         PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1264                 }
1265                 UpdateImageResourceState(stateRef, params, cb);
1266                 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1267                 if (isAddMips) {
1268                     const BindableImage image {
1269                         cb.resourceHandle,
1270                         cb.dst.optionalImageSubresourceRange.baseMipLevel,
1271                         cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1272                         cb.dst.optionalImageLayout,
1273                         RenderHandle {},
1274                     };
1275                     ModifyAdditionalImageState(image, stateRef.additionalState);
1276                 }
1277             }
1278             params.handledCustomBarriers[cb.resourceHandle] = 0;
1279         }
1280     }
1281 }
1282 
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1283 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1284     const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1285 {
1286     for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1287         const uint32_t barrierIndex = barrierIndexBegin + idx;
1288         PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1289         if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1290             const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1291             const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1292                 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1293                 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1294             UpdateStateAndCreateBarriersGpuBuffer(
1295                 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1296         }
1297     }
1298 }
1299 
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1300 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1301     const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1302 {
1303     for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1304         const uint32_t barrierIndex = barrierIndexBegin + idx;
1305         PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1306         if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1307             const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1308             const bool needsArgsBarrier =
1309                 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1310             if (needsArgsBarrier) {
1311                 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1312                     CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1313                 UpdateStateAndCreateBarriersGpuBuffer(
1314                     resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1315             }
1316         }
1317     }
1318 }
1319 
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1320 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1321     const array_view<const RenderCommandWithType>& cmdListRef)
1322 {
1323     const uint32_t nextListIdx = commandListCommandIndex + 1;
1324     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1325     const auto& nextCmdRef = cmdListRef[nextListIdx];
1326     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1327 
1328     const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1329 
1330     const bool needsBarrier =
1331         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1332     if (needsBarrier) {
1333         BindableImage bRes = {};
1334         bRes.handle = nextRc.handle;
1335         bRes.imageLayout = nextRc.imageLayout;
1336         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1337             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1338             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1339     }
1340 }
1341 
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1342 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1343     const array_view<const RenderCommandWithType>& cmdListRef)
1344 {
1345     const uint32_t nextListIdx = commandListCommandIndex + 1;
1346     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1347     const auto& nextCmdRef = cmdListRef[nextListIdx];
1348     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1349 
1350     const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1351 
1352     const bool needsSrcBarrier =
1353         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1354     if (needsSrcBarrier) {
1355         BindableImage bRes = {};
1356         bRes.handle = nextRc.srcHandle;
1357         bRes.imageLayout = nextRc.srcImageLayout;
1358         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1359             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1360             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1361     }
1362 
1363     const bool needsDstBarrier =
1364         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1365     if (needsDstBarrier) {
1366         BindableImage bRes = {};
1367         bRes.handle = nextRc.dstHandle;
1368         bRes.imageLayout = nextRc.dstImageLayout;
1369         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1370             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1371             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1372     }
1373 }
1374 
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1375 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1376     const array_view<const RenderCommandWithType>& cmdListRef)
1377 {
1378     const uint32_t nextListIdx = commandListCommandIndex + 1;
1379     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1380     const auto& nextCmdRef = cmdListRef[nextListIdx];
1381     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1382 
1383     const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1384 
1385     const bool needsSrcBarrier =
1386         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1387     if (needsSrcBarrier) {
1388         const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1389         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1390             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1391             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1392     }
1393 
1394     const bool needsDstBarrier =
1395         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1396     if (needsDstBarrier) {
1397         const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1398         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1399             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1400             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1401     }
1402 }
1403 
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1404 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1405     const array_view<const RenderCommandWithType>& cmdListRef)
1406 {
1407     const uint32_t nextListIdx = commandListCommandIndex + 1;
1408     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1409     const auto& nextCmdRef = cmdListRef[nextListIdx];
1410     PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1411                   (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1412 
1413     // NOTE: two different command types supported
1414     RenderHandle srcHandle;
1415     RenderHandle dstHandle;
1416     ImageSubresourceLayers srcImgLayers;
1417     ImageSubresourceLayers dstImgLayers;
1418     if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1419         const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1420         PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1421         srcHandle = nextRc.srcHandle;
1422         dstHandle = nextRc.dstHandle;
1423         srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1424         dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1425     } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1426         const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1427         srcHandle = nextRc.srcHandle;
1428         dstHandle = nextRc.dstHandle;
1429         srcImgLayers = nextRc.imageCopy.srcSubresource;
1430         dstImgLayers = nextRc.imageCopy.dstSubresource;
1431     }
1432 
1433     const bool needsSrcBarrier =
1434         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1435     if (needsSrcBarrier) {
1436         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1437         PLUGIN_UNUSED(handleType);
1438         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1439         if (handleType == RenderHandleType::GPU_BUFFER) {
1440             BindableBuffer bRes;
1441             bRes.handle = srcHandle;
1442             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1443                 GpuResourceState {
1444                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1445                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1446         } else {
1447             BindableImage bRes;
1448             bRes.handle = srcHandle;
1449             bRes.mip = srcImgLayers.mipLevel;
1450             bRes.layer = srcImgLayers.baseArrayLayer;
1451             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1452             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1453                 GpuResourceState {
1454                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1455                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1456         }
1457     }
1458 
1459     const bool needsDstBarrier =
1460         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1461     if (needsDstBarrier) {
1462         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1463         PLUGIN_UNUSED(handleType);
1464         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1465         if (handleType == RenderHandleType::GPU_BUFFER) {
1466             BindableBuffer bRes;
1467             bRes.handle = dstHandle;
1468             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1469                 GpuResourceState {
1470                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1471                 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1472         } else {
1473             BindableImage bRes;
1474             bRes.handle = dstHandle;
1475             bRes.mip = dstImgLayers.mipLevel;
1476             bRes.layer = dstImgLayers.baseArrayLayer;
1477             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1478             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1479                 GpuResourceState {
1480                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1481                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1482         }
1483     }
1484 }
1485 
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1486 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1487     const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1488 {
1489     const uint32_t nextListIdx = commandListCommandIndex + 1;
1490     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1491     const auto& nextCmdRef = cmdListRef[nextListIdx];
1492     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1493 
1494     const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1495 
1496     const bool needsArgsBarrier =
1497         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1498     if (needsArgsBarrier) {
1499         const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1500         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1501             GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1502                 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1503             bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1504     }
1505 }
1506 
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1507 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1508     const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1509     const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1510 {
1511     for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1512         if (RenderHandleUtil::GetHandleType(descriptorSetHandle) != RenderHandleType::DESCRIPTOR_SET) {
1513             continue;
1514         }
1515 
1516         // NOTE: for global descriptor sets we didn't know with render command list if it had dynamic resources
1517         const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(descriptorSetHandle);
1518         if (additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) {
1519             if (!nodeDescriptorSetMgrRef.HasDynamicBarrierResources(descriptorSetHandle)) {
1520                 continue;
1521             }
1522         }
1523 
1524         const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1525         const auto& buffers = bindingResources.buffers;
1526         const auto& images = bindingResources.images;
1527         for (const auto& refBuf : buffers) {
1528             const auto& ref = refBuf.desc;
1529             const uint32_t descriptorCount = ref.binding.descriptorCount;
1530             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1531             if (descriptorCount == 0) {
1532                 continue;
1533             }
1534             const uint32_t arrayOffset = ref.arrayOffset;
1535             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1536             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1537                 // first is the ref, starting from 1 we use array offsets
1538                 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1].desc;
1539                 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1540                     UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1541                 }
1542             }
1543         }
1544         for (const auto& refImg : images) {
1545             const auto& ref = refImg.desc;
1546             const uint32_t descriptorCount = ref.binding.descriptorCount;
1547             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1548             if (descriptorCount == 0) {
1549                 continue;
1550             }
1551             const uint32_t arrayOffset = ref.arrayOffset;
1552             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1553             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1554                 // first is the ref, starting from 1 we use array offsets
1555                 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1].desc;
1556                 if (CheckForBarrierNeed(
1557                     params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1558                     UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1559                 }
1560             }
1561         }
1562     } // end for
1563 }
1564 
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1565 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1566     const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1567 {
1568     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1569     if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1570         return;
1571     }
1572 
1573     auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1574     // NOTE: we previous patched the final render pass layouts here
1575     // ATM: we only path the swapchain image if needed
1576 
1577     const GpuResourceState& prevState = ref.state;
1578     const BindableImage& prevImage = ref.resource;
1579     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1580     const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1581                                                 : GetSrcImageBarrier(prevState, prevImage);
1582 
1583     const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1584     // NOTE: we are not interested in access flags, only write access interests us
1585     // not this (prevStateRb.accessFlags NOT state.accessFlags)
1586     const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (state.accessFlags & WRITE_ACCESS_FLAGS);
1587     const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1588     // input attachments are handled with render passes and not with barriers
1589     if ((layoutChanged || writeTarget) && (!inputAttachment)) {
1590         if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1591             (prevState.gpuQueue.type != state.gpuQueue.type)) {
1592             PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1593 
1594             PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1595             currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1596                 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1597         } else {
1598             const ResourceBarrier dstImageBarrier =
1599                 addMips ? GetDstImageBarrierMips(state, prevImage, res) : GetDstImageBarrier(state, res);
1600             params.combinedBarriers.push_back(
1601                 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1602         }
1603 
1604         ref.state = state;
1605         ref.resource = res;
1606         ref.prevRc = params.rcWithType;
1607         ref.prevRenderNodeIndex = params.renderNodeIndex;
1608         if (addMips) {
1609             ModifyAdditionalImageState(res, ref.additionalState);
1610         }
1611     }
1612 }
1613 
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1614 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1615     const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1616 {
1617     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1618     if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1619         return;
1620     }
1621 
1622     // get the current state of the buffer
1623     auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1624     const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1625     // if previous or current state is write -> barrier
1626     if ((prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (dstState.accessFlags & WRITE_ACCESS_FLAGS)) {
1627         params.combinedBarriers.push_back(CommandBarrier {
1628             res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1629     }
1630 
1631     // update the cached state to match the situation after the barrier
1632     srcStateRef.state = dstState;
1633     srcStateRef.resource = res;
1634     srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1635 }
1636 
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1637 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1638     const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer, vector<CommandBarrier>& barriers,
1639     vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1640 {
1641     auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1642     const GpuResourceState srcState = stateRef.state;
1643     const BindableBuffer srcBuffer = stateRef.resource;
1644 
1645     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1646         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1647         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1648         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1649         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1650         currNodeGpuResourceTransfer.push_back(
1651             RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1652                 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1653     } else {
1654         const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1655         const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1656 
1657         barriers.push_back(CommandBarrier {
1658             newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1659     }
1660 
1661     stateRef.state = newGpuResourceState;
1662     stateRef.resource = newBuffer;
1663     stateRef.prevRenderNodeIndex = renderNodeIndex;
1664 }
1665 
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1666 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1667     const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1668     vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1669 {
1670     // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1671 
1672     auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1673     const GpuResourceState srcState = stateRef.state;
1674     const BindableImage srcImage = stateRef.resource;
1675     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1676 
1677     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1678         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1679         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1680         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1681         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1682         currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1683             stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1684     } else {
1685         const ResourceBarrier srcBarrier =
1686             addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1687                     : GetSrcImageBarrier(srcState, srcImage);
1688         const ResourceBarrier dstBarrier = addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage)
1689                                                    : GetDstImageBarrier(newGpuResourceState, newImage);
1690 
1691         barriers.push_back(CommandBarrier {
1692             newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1693     }
1694 
1695     stateRef.state = newGpuResourceState;
1696     stateRef.resource = newImage;
1697     stateRef.prevRc = rcWithType;
1698     stateRef.prevRenderNodeIndex = renderNodeIndex;
1699     if (addMips) {
1700         ModifyAdditionalImageState(newImage, stateRef.additionalState);
1701     }
1702 }
1703 
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1704 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1705     const RenderHandle handle, const GpuQueue& queue)
1706 {
1707     // NOTE: Do not call with non dynamic trackable
1708     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1709     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1710     if (arrayIndex < gpuBufferDataIndices_.size()) {
1711         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1712         uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1713         if (dataIdx == INVALID_TRACK_IDX) {
1714             if (!gpuBufferAvailableIndices_.empty()) {
1715                 dataIdx = gpuBufferAvailableIndices_.back();
1716                 gpuBufferAvailableIndices_.pop_back();
1717             } else {
1718                 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1719                 gpuBufferTracking_.emplace_back();
1720             }
1721             gpuBufferDataIndices_[arrayIndex] = dataIdx;
1722 
1723             gpuBufferTracking_[dataIdx].resource.handle = handle;
1724             gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1725         }
1726         return gpuBufferTracking_[dataIdx];
1727     }
1728 
1729     return defaultBufferState_;
1730 }
1731 
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1732 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1733     const RenderHandle handle, const GpuQueue& queue)
1734 {
1735     // NOTE: Do not call with non dynamic trackable
1736     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1737     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1738     if (arrayIndex < gpuImageDataIndices_.size()) {
1739         // NOTE: render pass attachments expected to be dynamic resources always
1740         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1741         uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1742         if (dataIdx == INVALID_TRACK_IDX) {
1743             if (!gpuImageAvailableIndices_.empty()) {
1744                 dataIdx = gpuImageAvailableIndices_.back();
1745                 gpuImageAvailableIndices_.pop_back();
1746             } else {
1747                 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1748                 gpuImageTracking_.emplace_back();
1749             }
1750             gpuImageDataIndices_[arrayIndex] = dataIdx;
1751 
1752             gpuImageTracking_[dataIdx].resource.handle = handle;
1753             gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1754             if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1755                 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1756                 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1757             }
1758         }
1759 #if (RENDER_VALIDATION_ENABLED == 1)
1760         if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1761             (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1762             PLUGIN_LOG_ONCE_W("dynamic_state_mips_issue_" + to_string(handle.id),
1763                 "RENDER_VALIDATION: Additional mip states missing (handle:%" PRIx64 ")", handle.id);
1764         }
1765 #endif
1766         return gpuImageTracking_[dataIdx];
1767     }
1768 
1769     PLUGIN_LOG_ONCE_W("render_graph_image_state_issues", "RenderGraph: Image tracking issue with handle count");
1770     return defaultImageState_;
1771 }
1772 RENDER_END_NAMESPACE()
1773