1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_graph.h"
17
18 #include <cinttypes>
19
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24
25 #include "device/device.h"
26 #include "device/gpu_resource_cache.h"
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32
33 using namespace BASE_NS;
34
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
47 switch (rc.type) {
48 case RenderCommandType::DRAW: {
49 PLUGIN_LOG_I("rc: Draw");
50 break;
51 }
52 case RenderCommandType::DRAW_INDIRECT: {
53 PLUGIN_LOG_I("rc: DrawIndirect");
54 break;
55 }
56 case RenderCommandType::DISPATCH: {
57 PLUGIN_LOG_I("rc: Dispatch");
58 break;
59 }
60 case RenderCommandType::DISPATCH_INDIRECT: {
61 PLUGIN_LOG_I("rc: DispatchIndirect");
62 break;
63 }
64 case RenderCommandType::BIND_PIPELINE: {
65 PLUGIN_LOG_I("rc: BindPipeline");
66 break;
67 }
68 case RenderCommandType::BEGIN_RENDER_PASS: {
69 PLUGIN_LOG_I("rc: BeginRenderPass");
70 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
71 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
72 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
73 const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
74 PLUGIN_LOG_I(" attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
75 }
76 PLUGIN_LOG_I(" subpass count: %u, subpass start idx: %u",
77 (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
78 }
79 break;
80 }
81 case RenderCommandType::NEXT_SUBPASS: {
82 PLUGIN_LOG_I("rc: NextSubpass");
83 break;
84 }
85 case RenderCommandType::END_RENDER_PASS: {
86 PLUGIN_LOG_I("rc: EndRenderPass");
87 break;
88 }
89 case RenderCommandType::BIND_VERTEX_BUFFERS: {
90 PLUGIN_LOG_I("rc: BindVertexBuffers");
91 break;
92 }
93 case RenderCommandType::BIND_INDEX_BUFFER: {
94 PLUGIN_LOG_I("rc: BindIndexBuffer");
95 break;
96 }
97 case RenderCommandType::COPY_BUFFER: {
98 PLUGIN_LOG_I("rc: CopyBuffer");
99 break;
100 }
101 case RenderCommandType::COPY_BUFFER_IMAGE: {
102 PLUGIN_LOG_I("rc: CopyBufferImage");
103 break;
104 }
105 case RenderCommandType::COPY_IMAGE: {
106 PLUGIN_LOG_I("rc: CopyImage");
107 break;
108 }
109 case RenderCommandType::BLIT_IMAGE: {
110 PLUGIN_LOG_I("rc: BlitImage");
111 break;
112 }
113 case RenderCommandType::BARRIER_POINT: {
114 PLUGIN_LOG_I("rc: BarrierPoint");
115 break;
116 }
117 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
118 PLUGIN_LOG_I("rc: BindDescriptorSets");
119 break;
120 }
121 case RenderCommandType::PUSH_CONSTANT: {
122 PLUGIN_LOG_I("rc: PushConstant");
123 break;
124 }
125 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
126 PLUGIN_LOG_I("rc: BuildAccelerationStructure");
127 break;
128 }
129 case RenderCommandType::CLEAR_COLOR_IMAGE: {
130 PLUGIN_LOG_I("rc: ClearColorImage");
131 break;
132 }
133
134 // dynamic states
135 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
136 PLUGIN_LOG_I("rc: DynamicStateViewport");
137 break;
138 }
139 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
140 PLUGIN_LOG_I("rc: DynamicStateScissor");
141 break;
142 }
143 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
144 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
145 break;
146 }
147 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
148 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
149 break;
150 }
151 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
152 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
153 break;
154 }
155 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
156 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
157 break;
158 }
159 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
160 PLUGIN_LOG_I("rc: DynamicStateStencil");
161 break;
162 }
163 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
164 PLUGIN_LOG_I("rc: DynamicStateFragmentShadingRate");
165 break;
166 }
167 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
168 PLUGIN_LOG_I("rc: ExecuteBackendFramePosition");
169 break;
170 }
171
172 case RenderCommandType::WRITE_TIMESTAMP: {
173 PLUGIN_LOG_I("rc: WriteTimestamp");
174 break;
175 }
176 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
177 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
178 break;
179 }
180 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
181 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
182 break;
183 }
184 case RenderCommandType::BEGIN_DEBUG_MARKER: {
185 PLUGIN_LOG_I("rc: BeginDebugMarker");
186 break;
187 }
188 case RenderCommandType::END_DEBUG_MARKER: {
189 PLUGIN_LOG_I("rc: EndDebugMarker");
190 break;
191 }
192 case RenderCommandType::UNDEFINED:
193 case RenderCommandType::COUNT: {
194 PLUGIN_ASSERT(false && "non-valid render command");
195 break;
196 }
197 }
198 }
199 }
200
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)201 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
202 {
203 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
204 for (const auto& ref : combinedBarriers) {
205 const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
206 if (type == RenderHandleType::GPU_BUFFER) {
207 PLUGIN_LOG_I("barrier buffer :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
208 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
209 ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags);
210 } else {
211 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
212 PLUGIN_LOG_I("barrier image :: handle:0x%" PRIx64
213 " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
214 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
215 ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags, ref.src.optionalImageLayout,
216 ref.dst.optionalImageLayout);
217 }
218 }
219 }
220 }
221
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)222 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
223 {
224 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
225 for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
226 const auto handle = rc.renderPassDesc.attachmentHandles[idx];
227 const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
228 const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
229 PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64
230 " name:%s, src_layout:%u dst_layout:%u (patched later)",
231 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
232 }
233 }
234 }
235
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)236 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
237 {
238 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
239 // NOTE: gpuHandle might be the same when generation index wraps around
240 // and when using shallow handles (shadow -> re-use normal -> shadow -> re-use normal etc)
241 const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
242 PLUGIN_LOG_I("image_state :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
243 resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
244 resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
245 RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
246 RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
247 // one could fetch and print vulkan handle here as well e.g.
248 // 1. const GpuImagePlatformDataVk& plat =
249 // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
250 // 3. PLUGIN_LOG_I("end_frame image :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
251 }
252 }
253 #endif // RENDER_DEV_ENABLED
254
255 constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
256 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
257 CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
258 CORE_ACCESS_MEMORY_WRITE_BIT;
259
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)260 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
261 RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
262 {
263 const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
264 for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
265 if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
266 beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
267 storeState.resource.imageLayout = imageLayout;
268 }
269 }
270 }
271
UpdateMultiRenderCommandListRenderPasses(Device & device,RenderGraph::MultiRenderPassStore & store)272 void UpdateMultiRenderCommandListRenderPasses(Device& device, RenderGraph::MultiRenderPassStore& store)
273 {
274 const auto renderPassCount = (uint32_t)store.renderPasses.size();
275 PLUGIN_ASSERT(renderPassCount > 1);
276
277 RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
278 PLUGIN_ASSERT(firstRenderPass);
279 PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
280 const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
281 PLUGIN_ASSERT(lastRenderPass);
282
283 const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
284
285 // take attachment loads from the first one, and stores from the last one
286 // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
287 // initial store the correct render pass description to first render pass and then copy to others
288 // resource states are copied from valid subpasses to another render command list subpasses
289 for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
290 const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
291 const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
292 const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
293 for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
294 if (fromRpIdx != toRpIdx) {
295 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
296 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
297 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
298 toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
299 toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
300 }
301 }
302 }
303 }
304
305 for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
306 firstRenderPass->renderPassDesc.attachments[idx].storeOp =
307 lastRenderPass->renderPassDesc.attachments[idx].storeOp;
308 firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
309 lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
310
311 firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
312 lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
313 }
314
315 // copy subpasses to first and mark if merging subpasses
316 bool mergeSubpasses = false;
317 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
318 firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
319 if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
320 mergeSubpasses = true;
321 }
322 }
323 // NOTE: only use merge subpasses in vulkan at the moment
324 if (device.GetBackendType() != DeviceBackendType::VULKAN) {
325 mergeSubpasses = false;
326 }
327
328 uint32_t subpassCount = renderPassCount;
329 if (mergeSubpasses) {
330 PLUGIN_ASSERT(renderPassCount > 1U);
331 // merge from back to front
332 const uint32_t finalSubpass = renderPassCount - 1U;
333 uint32_t mergeCount = 0U;
334 for (uint32_t idx = finalSubpass; idx > 0U; --idx) {
335 if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
336 PLUGIN_ASSERT(idx > 0U);
337
338 uint32_t prevSubpassIdx = idx - 1U;
339 auto& currSubpass = firstRenderPass->subpasses[idx];
340 auto& prevSubpass = firstRenderPass->subpasses[prevSubpassIdx];
341 // cannot merge in these cases
342 if (currSubpass.inputAttachmentCount != prevSubpass.inputAttachmentCount) {
343 currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
344 #if (RENDER_VALIDATION_ENABLED == 1)
345 PLUGIN_LOG_W(
346 "RENDER_VALIDATION: Trying to merge subpasses with input attachments, undefined results");
347 #endif
348 }
349 if (prevSubpass.resolveAttachmentCount > currSubpass.resolveAttachmentCount) {
350 currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
351 #if (RENDER_VALIDATION_ENABLED == 1)
352 PLUGIN_LOG_W("RENDER_VALIDATION: Trying to merge subpasses with different resolve counts, "
353 "undefined results");
354 #endif
355 }
356 if ((currSubpass.subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
357 // merge failed -> continue
358 continue;
359 }
360
361 mergeCount++;
362 auto& currRenderPass = store.renderPasses[idx];
363 const auto& currSubpassResourceStates = currRenderPass->subpassResourceStates[idx];
364 currRenderPass->subpassStartIndex = currRenderPass->subpassStartIndex - 1U;
365 // can merge
366 currSubpass.subpassFlags |= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
367
368 auto& prevRenderPass = store.renderPasses[prevSubpassIdx];
369 auto& prevSubpassResourceStates = prevRenderPass->subpassResourceStates[prevSubpassIdx];
370 // NOTE: at the moment copies everything from the current subpass
371 CloneData(&prevSubpass, sizeof(RenderPassSubpassDesc), &currSubpass, sizeof(RenderPassSubpassDesc));
372 // copy layouts and states from the current to previous
373 for (uint32_t resourceIdx = 0U; resourceIdx < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT;
374 ++resourceIdx) {
375 prevSubpassResourceStates.layouts[resourceIdx] = currSubpassResourceStates.layouts[resourceIdx];
376 prevSubpassResourceStates.states[resourceIdx] = currSubpassResourceStates.states[resourceIdx];
377 }
378 }
379 }
380
381 // new minimal subpass count
382 subpassCount = subpassCount - mergeCount;
383 firstRenderPass->renderPassDesc.subpassCount = subpassCount;
384 firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), subpassCount };
385 // update subpass start indices
386 uint32_t subpassStartIndex = 0;
387 for (uint32_t idx = 1U; idx < renderPassCount; ++idx) {
388 auto& currRenderPass = store.renderPasses[idx];
389 if (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
390 currRenderPass->subpassStartIndex = subpassStartIndex;
391 } else {
392 subpassStartIndex++;
393 }
394 }
395 }
396
397 // copy from first to following render passes
398 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
399 // subpass start index is the only changing variables
400 auto& currRenderPass = store.renderPasses[idx];
401 const uint32_t subpassStartIndex = currRenderPass->subpassStartIndex;
402 currRenderPass->renderPassDesc = firstRenderPass->renderPassDesc;
403 // advance subpass start index if not merging
404 if (mergeSubpasses &&
405 ((idx < currRenderPass->subpasses.size()) &&
406 (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT))) {
407 // NOTE: subpassResourceStates are copied in this case
408 currRenderPass->subpassResourceStates[subpassStartIndex] =
409 firstRenderPass->subpassResourceStates[subpassStartIndex];
410 }
411 currRenderPass->subpassStartIndex = subpassStartIndex;
412 // copy all subpasses and input resource states
413 currRenderPass->subpasses = firstRenderPass->subpasses;
414 currRenderPass->inputResourceStates = firstRenderPass->inputResourceStates;
415 // image layouts needs to match
416 currRenderPass->imageLayouts = firstRenderPass->imageLayouts;
417 // NOTE: subpassResourceStates are only copied when doing merging
418 }
419 }
420
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)421 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
422 {
423 return {
424 state.accessFlags,
425 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
426 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
427 res.byteOffset,
428 res.byteSize,
429 };
430 }
431
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)432 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
433 {
434 return {
435 state.accessFlags,
436 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
437 res.imageLayout,
438 0,
439 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
440 };
441 }
442
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)443 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
444 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
445 {
446 uint32_t mipLevel = 0U;
447 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
448 ImageLayout srcImageLayout = src.imageLayout;
449 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
450 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
451 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
452 mipLevel = dst.mip;
453 mipCount = 1U;
454 } else {
455 mipLevel = src.mip;
456 // all mip levels
457 }
458 PLUGIN_ASSERT(additionalImageState.layouts);
459 srcImageLayout = additionalImageState.layouts[mipLevel];
460 }
461 return {
462 state.accessFlags,
463 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
464 srcImageLayout,
465 0,
466 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
467 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
468 };
469 }
470
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)471 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
472 {
473 return {
474 state.accessFlags,
475 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
476 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
477 res.byteOffset,
478 res.byteSize,
479 };
480 }
481
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)482 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
483 {
484 return {
485 state.accessFlags,
486 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
487 res.imageLayout,
488 0,
489 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
490 };
491 }
492
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst)493 ResourceBarrier GetDstImageBarrierMips(
494 const GpuResourceState& state, const BindableImage& src, const BindableImage& dst)
495 {
496 uint32_t mipLevel = 0U;
497 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
498 ImageLayout dstImageLayout = dst.imageLayout;
499 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
500 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
501 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
502 mipLevel = dst.mip;
503 mipCount = 1U;
504 } else {
505 mipLevel = src.mip;
506 // all mip levels
507 }
508 }
509 return {
510 state.accessFlags,
511 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
512 dstImageLayout,
513 0,
514 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
515 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
516 };
517 }
518
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)519 void ModifyAdditionalImageState(
520 const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
521 {
522 #if (RENDER_VALIDATION_ENABLED == 1)
523 // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
524 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
525 #endif
526 if (additionalStateRef.layouts) {
527 if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
528 (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
529 additionalStateRef.layouts[res.mip] = res.imageLayout;
530 } else {
531 // set layout for all mips
532 for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
533 additionalStateRef.layouts[idx] = res.imageLayout;
534 }
535 }
536 } else {
537 #if (RENDER_VALIDATION_ENABLED == 1)
538 PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
539 #endif
540 }
541 }
542
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)543 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
544 const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
545 {
546 return {
547 handle,
548
549 ResourceBarrier {
550 0,
551 PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
552 srcImageLayout,
553 0,
554 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
555 ImageSubresourceRange {},
556 },
557 srcGpuQueue,
558
559 ResourceBarrier {
560 0,
561 PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
562 dstImageLayout,
563 0,
564 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
565 ImageSubresourceRange {},
566 },
567 dstGpuQueue,
568 };
569 }
570
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)571 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
572 array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
573 {
574 for (const auto& transferRef : currNodeGpuResourceTransfers) {
575 PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
576 if (transferRef.acquireNodeIdx >= frameRenderNodeContextData.size()) {
577 // skip
578 continue;
579 }
580
581 auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
582 const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
583 GpuQueue releaseGpuQueue = acquireGpuQueue;
584
585 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
586 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
587 releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
588 }
589
590 const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
591 acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
592
593 // release ownership (NOTE: not done for previous frame)
594 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
595 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
596 const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
597 const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
598 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
599 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
600 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
601
602 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
603 releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
604
605 // inform that we are patching valid barriers
606 releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
607 }
608 // acquire ownership
609 {
610 const uint32_t rcIndex = 0;
611 const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
612 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
613 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
614 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
615
616 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
617 acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
618
619 // inform that we are patching valid barriers
620 acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
621 }
622 }
623 }
624
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)625 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
626 const uint32_t customBarrierCount, const RenderHandle handle)
627 {
628 bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
629 if ((customBarrierCount > 0) && needsBarrier) {
630 needsBarrier = (handledCustomBarriers.count(handle) == 0);
631 }
632 return needsBarrier;
633 }
634 } // namespace
635
RenderGraph(Device & device)636 RenderGraph::RenderGraph(Device& device)
637 : device_(device), gpuResourceMgr_((GpuResourceManager&)device.GetGpuResourceManager())
638 {}
639
BeginFrame()640 void RenderGraph::BeginFrame()
641 {
642 stateCache_.multiRenderPassStore.renderPasses.clear();
643 stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
644 stateCache_.multiRenderPassStore.supportOpen = false;
645 stateCache_.nodeCounter = 0u;
646 stateCache_.checkForBackbufferDependency = false;
647 stateCache_.usesSwapchainImage = false;
648 }
649
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)650 void RenderGraph::ProcessRenderNodeGraph(
651 const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
652 {
653 stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
654
655 // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
656 // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
657 // handles:
658 // gpu images in descriptor sets, render passes, blits, and custom barriers
659 // gpu buffers in descriptor sets, and custom barriers
660
661 {
662 // remove resources that will not be tracked anymore and release available slots
663 const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
664 for (const auto& handle : stateResetData.resources) {
665 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
666 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
667 if ((handleType == RenderHandleType::GPU_IMAGE) &&
668 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
669 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
670 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
671 gpuImageTracking_[dataIdx] = {}; // reset
672 gpuImageAvailableIndices_.push_back(dataIdx);
673 }
674 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
675 } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
676 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
677 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
678 gpuBufferTracking_[dataIdx] = {}; // reset
679 gpuBufferAvailableIndices_.push_back(dataIdx);
680 }
681 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
682 }
683 }
684 }
685
686 gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
687 gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
688
689 #if (RENDER_DEV_ENABLED == 1)
690 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
691 CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
692 static uint64_t debugFrame = 0;
693 debugFrame++;
694 PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
695 }
696 #endif
697
698 // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
699 ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
700
701 // store final state for next frame
702 StoreFinalBufferState();
703 StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
704 }
705
GetSwapchainResourceStates() const706 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
707 {
708 return swapchainStates_;
709 }
710
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)711 void RenderGraph::ProcessRenderNodeGraphNodeStores(
712 const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
713 {
714 for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
715 PLUGIN_ASSERT(graphStore);
716 if (!graphStore) {
717 continue;
718 }
719
720 for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
721 auto& ref = graphStore->renderNodeContextData[nodeIdx];
722 ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
723 stateCache.usesSwapchainImage = false; // reset
724
725 #if (RENDER_DEV_ENABLED == 1)
726 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
727 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
728 }
729 #endif
730
731 if (stateCache.multiRenderPassStore.supportOpen && (stateCache.multiRenderPassStore.renderPasses.empty())) {
732 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
733 // NOTE: add more error handling and invalidate render command lists
734 }
735 stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
736 array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
737 // go through commands that affect or need transitions and barriers
738 ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
739
740 // needs backbuffer/swapchain wait
741 if (stateCache.usesSwapchainImage) {
742 ref.submitInfo.waitForSwapchainAcquireSignal = true;
743 }
744
745 // patch gpu resource queue transfers
746 if (!currNodeGpuResourceTransfers_.empty()) {
747 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
748 // clear for next use
749 currNodeGpuResourceTransfers_.clear();
750 }
751
752 stateCache_.nodeCounter++;
753 }
754 }
755 }
756
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)757 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
758 const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
759 {
760 for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
761 auto& cmdRef = cmdListRef[listIdx];
762
763 #if (RENDER_DEV_ENABLED == 1)
764 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
765 DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
766 }
767 #endif
768
769 // most of the commands are handled within BarrierPoint
770 switch (cmdRef.type) {
771 case RenderCommandType::BARRIER_POINT:
772 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
773 break;
774
775 case RenderCommandType::BEGIN_RENDER_PASS:
776 RenderCommand(
777 nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
778 break;
779
780 case RenderCommandType::END_RENDER_PASS:
781 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
782 break;
783
784 case RenderCommandType::NEXT_SUBPASS:
785 case RenderCommandType::DRAW:
786 case RenderCommandType::DRAW_INDIRECT:
787 case RenderCommandType::DISPATCH:
788 case RenderCommandType::DISPATCH_INDIRECT:
789 case RenderCommandType::BIND_PIPELINE:
790 case RenderCommandType::BIND_VERTEX_BUFFERS:
791 case RenderCommandType::BIND_INDEX_BUFFER:
792 case RenderCommandType::COPY_BUFFER:
793 case RenderCommandType::COPY_BUFFER_IMAGE:
794 case RenderCommandType::COPY_IMAGE:
795 case RenderCommandType::BIND_DESCRIPTOR_SETS:
796 case RenderCommandType::PUSH_CONSTANT:
797 case RenderCommandType::BLIT_IMAGE:
798 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
799 case RenderCommandType::CLEAR_COLOR_IMAGE:
800 case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
801 case RenderCommandType::DYNAMIC_STATE_SCISSOR:
802 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
803 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
804 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
805 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
806 case RenderCommandType::DYNAMIC_STATE_STENCIL:
807 case RenderCommandType::WRITE_TIMESTAMP:
808 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
809 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
810 case RenderCommandType::UNDEFINED:
811 default: {
812 // nop
813 break;
814 }
815 }
816 } // end command for
817 }
818
StoreFinalBufferState()819 void RenderGraph::StoreFinalBufferState()
820 {
821 for (auto& ref : gpuBufferTracking_) {
822 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
823 ref = {};
824 continue;
825 }
826 // NOTE: we cannot soft reset here
827 // if we do so some buffer usage might overlap in the next frame
828 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
829 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
830 const RenderHandle handle = ref.resource.handle;
831 ref = {};
832 ref.resource.handle = handle;
833 }
834
835 // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
836 // frames)
837 ref.prevRenderNodeIndex = { ~0u };
838 }
839 }
840
StoreFinalImageState()841 void RenderGraph::StoreFinalImageState()
842 {
843 swapchainStates_ = {}; // reset
844
845 #if (RENDER_DEV_ENABLED == 1)
846 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
847 PLUGIN_LOG_I("end_frame image_state:");
848 }
849 #endif
850 for (auto& ref : gpuImageTracking_) {
851 // if resource is not dynamic, we do not track and care
852 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
853 ref = {};
854 continue;
855 }
856 // handle automatic presentation layout
857 if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
858 if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
859 RenderCommandBeginRenderPass& beginRenderPass =
860 *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
861 PatchRenderPassFinalLayout(
862 ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
863 }
864 // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
865 // store final state for backbuffer
866 // currently we only swapchains if they are really in use in this frame
867 const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
868 if (flags != 0) {
869 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
870 }
871 }
872 #if (RENDER_DEV_ENABLED == 1)
873 // print before reset for next frame
874 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
875 DebugPrintImageState(gpuResourceMgr_, ref);
876 }
877 #endif
878 // shallow resources are not tracked
879 // they are always in undefined state in the beging of the frame
880 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
881 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
882 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
883 const RenderHandle handle = ref.resource.handle;
884 ref = {};
885 ref.resource.handle = handle;
886 if (addMips) {
887 PLUGIN_ASSERT(!ref.additionalState.layouts);
888 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
889 }
890 } else {
891 // NOTE: render pass compatibility hashing with stages and access flags
892 // creates quite many new graphics pipelines in the first few frames
893 // do soft reset here to prevent access flags from previous frame
894 // NOTE: in theory this soft reset might create overlap of rendering to a same target
895 ref.state.accessFlags = 0;
896 ref.state.pipelineStageFlags = 0;
897 ref.state.shaderStageFlags = 0;
898 }
899
900 // need to reset per frame variables for all images (so we do not try to patch from previous frames)
901 ref.prevRc = {};
902 ref.prevRenderNodeIndex = { ~0u };
903 }
904 }
905
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)906 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
907 RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
908 {
909 // update layouts for attachments to gpu image state
910 BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
911
912 PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
913
914 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
915 if (hasRenderPassDependency) { // stitch render pass subpasses
916 BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
917 }
918
919 const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
920
921 auto finalImageLayouts =
922 array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
923
924 BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
925
926 for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
927 const auto& subpassRef = rc.subpasses[subpassIdx];
928 const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
929
930 BeginRenderPassUpdateSubpassImageStates(
931 array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
932 subpassResourceStatesRef, finalImageLayouts);
933
934 BeginRenderPassUpdateSubpassImageStates(
935 array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
936 subpassResourceStatesRef, finalImageLayouts);
937
938 BeginRenderPassUpdateSubpassImageStates(
939 array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
940 subpassResourceStatesRef, finalImageLayouts);
941
942 if (subpassRef.depthAttachmentCount == 1u) {
943 BeginRenderPassUpdateSubpassImageStates(
944 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
945 subpassResourceStatesRef, finalImageLayouts);
946 if (subpassRef.depthResolveAttachmentCount == 1) {
947 BeginRenderPassUpdateSubpassImageStates(
948 array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
949 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
950 }
951 }
952 if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
953 BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
954 subpassRef.fragmentShadingRateAttachmentCount),
955 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
956 }
957 }
958
959 if (hasRenderPassDependency) { // stitch render pass subpasses
960 if (rc.subpassStartIndex > 0) {
961 // stitched to behave as a nextSubpass() and not beginRenderPass()
962 rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
963 }
964 const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
965 if (finalSubpass) {
966 UpdateMultiRenderCommandListRenderPasses(device_, stateCache.multiRenderPassStore);
967 // multiRenderPassStore cleared in EndRenderPass
968 }
969 }
970 #if (RENDER_DEV_ENABLED == 1)
971 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
972 DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
973 }
974 #endif
975 }
976
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)977 void RenderGraph::BeginRenderPassHandleDependency(
978 BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
979 {
980 params.stateCache.multiRenderPassStore.renderPasses.push_back(¶ms.rc);
981 // store the first begin render pass
982 params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
983 params.stateCache.multiRenderPassStore.renderPasses[0] };
984
985 if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
986 #ifndef NDEBUG
987 // barrier point must be previous command
988 PLUGIN_ASSERT(commandListCommandIndex >= 1);
989 const uint32_t prevCommandIndex = commandListCommandIndex - 1;
990 const RenderCommandWithType& barrierPointCmdRef =
991 nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
992 PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
993 PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
994 #endif
995 params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
996 }
997 }
998
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)999 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
1000 array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
1001 {
1002 auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
1003 const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
1004 auto& attachments = params.rc.renderPassDesc.attachments;
1005 auto& attachmentInputResourceStates = params.rc.inputResourceStates;
1006
1007 for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
1008 const RenderHandle handle = attachmentHandles[attachmentIdx];
1009 // NOTE: invalidate invalid handle commands already in render command list
1010 if (!RenderHandleUtil::IsGpuImage(handle)) {
1011 #ifdef _DEBUG
1012 PLUGIN_LOG_E("invalid handle in render node graph");
1013 #endif
1014 continue;
1015 }
1016 auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
1017 ImageLayout imgLayout = stateRef.resource.imageLayout;
1018
1019 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1020 // image layout is undefined if automatic barriers have been disabled
1021 if (params.rc.enableAutomaticLayoutChanges) {
1022 const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
1023 if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
1024 if (stateRef.additionalState.layouts) {
1025 imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
1026 } else {
1027 #if (RENDER_VALIDATION_ENABLED == 1)
1028 PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
1029 #endif
1030 }
1031 }
1032
1033 initialImageLayouts[attachmentIdx] = imgLayout;
1034 }
1035 // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
1036 if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
1037 (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
1038 // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
1039 attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
1040 }
1041 finalImageLayouts[attachmentIdx] = imgLayout;
1042 attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
1043 attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
1044
1045 // store render pass for final layout patching
1046 stateRef.prevRc = params.rpForCmdRef;
1047 stateRef.prevRenderNodeIndex = renderNodeIndex;
1048
1049 // flag for backbuffer use
1050 if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
1051 params.stateCache.usesSwapchainImage = true;
1052 }
1053 }
1054 }
1055
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts)1056 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
1057 const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
1058 array_view<ImageLayout> finalImageLayouts)
1059 {
1060 for (const uint32_t attachmentIndex : attatchmentIndices) {
1061 // NOTE: handle invalid commands already in render command list and invalidate draws etc.
1062 PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
1063 const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
1064 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1065 const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
1066 const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
1067 // NOTE: we should support non dynamicity and GENERAL
1068
1069 finalImageLayouts[attachmentIndex] = refImgLayout;
1070 auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
1071 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1072
1073 ref.state = refState;
1074 ref.resource.handle = handle;
1075 ref.resource.imageLayout = refImgLayout;
1076 if (addMips) {
1077 const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1078 const BindableImage image {
1079 handle,
1080 attachmentDesc.mipLevel,
1081 attachmentDesc.layer,
1082 refImgLayout,
1083 RenderHandle {},
1084 };
1085 ModifyAdditionalImageState(image, ref.additionalState);
1086 }
1087 }
1088 }
1089
RenderCommand(RenderCommandEndRenderPass & rc,StateCache & stateCache)1090 void RenderGraph::RenderCommand(RenderCommandEndRenderPass& rc, StateCache& stateCache)
1091 {
1092 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1093 if (hasRenderPassDependency) {
1094 const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1095 if (finalSubpass) {
1096 if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1097 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1098 // NOTE: add more error handling and invalidate render command lists
1099 }
1100 rc.endType = RenderPassEndType::END_RENDER_PASS;
1101 stateCache.multiRenderPassStore.renderPasses.clear();
1102 stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1103 stateCache.multiRenderPassStore.supportOpen = false;
1104 } else {
1105 rc.endType = RenderPassEndType::END_SUBPASS;
1106 }
1107 }
1108 }
1109
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1110 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1111 RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1112 {
1113 // go through required descriptors for current upcoming event
1114 const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1115 const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1116 const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1117 const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1118
1119 parameterCachePools_.combinedBarriers.clear();
1120 parameterCachePools_.handledCustomBarriers.clear();
1121 ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1122 rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1123 nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1124 // first check custom barriers
1125 if (parameters.customBarrierCount > 0) {
1126 HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1127 }
1128 // then vertex / index buffer barriers in the barrier point before render pass
1129 if (parameters.vertexInputBarrierCount > 0) {
1130 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1131 HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1132 nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1133 }
1134 if (parameters.indirectBufferBarrierCount > 0U) {
1135 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1136 HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1137 nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1138 }
1139
1140 // in barrier point the next render command is known for which the barrier is needed
1141 if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1142 HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1143 } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1144 HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1145 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1146 HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1147 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1148 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1149 } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1150 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1151 } else { // descriptor sets
1152 if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1153 HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1154 }
1155 const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1156 const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1157 const uint32_t descriptorSetHandleMaxIndex =
1158 Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1159 const auto descriptorSetHandlesForBarriers =
1160 array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1161 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1162 HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1163 }
1164
1165 if (!parameters.combinedBarriers.empty()) {
1166 // use first render pass barrier point with following subpasses
1167 // firstRenderPassBarrierPoint is null for the first subpass
1168 const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1169 if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1170 PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1171 stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1172 rc.barrierPointIndex, parameters.combinedBarriers);
1173 } else {
1174 nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1175 }
1176 }
1177 #if (RENDER_DEV_ENABLED == 1)
1178 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1179 DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1180 }
1181 #endif
1182 }
1183
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1184 inline void RenderGraph::UpdateBufferResourceState(
1185 RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1186 {
1187 stateRef.resource.handle = cb.resourceHandle;
1188 stateRef.state.shaderStageFlags = 0;
1189 stateRef.state.accessFlags = cb.dst.accessFlags;
1190 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1191 stateRef.state.gpuQueue = params.gpuQueue;
1192 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1193 }
1194
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1195 inline void RenderGraph::UpdateImageResourceState(
1196 RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1197 {
1198 stateRef.resource.handle = cb.resourceHandle;
1199 stateRef.state.shaderStageFlags = 0;
1200 stateRef.state.accessFlags = cb.dst.accessFlags;
1201 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1202 stateRef.state.gpuQueue = params.gpuQueue;
1203 stateRef.prevRc = params.rcWithType;
1204 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1205 }
1206
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1207 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1208 const array_view<const CommandBarrier>& customBarrierListRef)
1209 {
1210 params.handledCustomBarriers.reserve(params.customBarrierCount);
1211 PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1212 for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1213 end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1214 begin != end; ++begin) {
1215 // add a copy and modify if needed
1216 auto& cb = params.combinedBarriers.emplace_back(*begin);
1217
1218 // NOTE: undefined type is for non-resource memory/pipeline barriers
1219 const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1220 const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1221 PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1222 (type == RenderHandleType::GPU_IMAGE));
1223 if (type == RenderHandleType::GPU_BUFFER) {
1224 if (isDynamicTrack) {
1225 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1226 UpdateBufferResourceState(stateRef, params, cb);
1227 }
1228 params.handledCustomBarriers[cb.resourceHandle] = 0;
1229 } else if (type == RenderHandleType::GPU_IMAGE) {
1230 if (isDynamicTrack) {
1231 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1232 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1233 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1234 uint32_t mipLevel = 0U;
1235 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1236 ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1237 if (isAddMips) {
1238 const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1239 const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1240 if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1241 (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1242 if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1243 mipLevel = dstMip;
1244 mipCount = 1U;
1245 } else {
1246 mipLevel = srcMip;
1247 // all mip levels
1248 }
1249 if (stateRef.additionalState.layouts) {
1250 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1251 } else {
1252 #if (RENDER_VALIDATION_ENABLED == 1)
1253 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1254 #endif
1255 }
1256 }
1257 }
1258 cb.src.accessFlags = stateRef.state.accessFlags;
1259 cb.src.pipelineStageFlags =
1260 stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1261 cb.src.optionalImageLayout = srcImageLayout;
1262 cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1263 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1264 }
1265 UpdateImageResourceState(stateRef, params, cb);
1266 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1267 if (isAddMips) {
1268 const BindableImage image {
1269 cb.resourceHandle,
1270 cb.dst.optionalImageSubresourceRange.baseMipLevel,
1271 cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1272 cb.dst.optionalImageLayout,
1273 RenderHandle {},
1274 };
1275 ModifyAdditionalImageState(image, stateRef.additionalState);
1276 }
1277 }
1278 params.handledCustomBarriers[cb.resourceHandle] = 0;
1279 }
1280 }
1281 }
1282
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1283 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1284 const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1285 {
1286 for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1287 const uint32_t barrierIndex = barrierIndexBegin + idx;
1288 PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1289 if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1290 const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1291 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1292 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1293 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1294 UpdateStateAndCreateBarriersGpuBuffer(
1295 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1296 }
1297 }
1298 }
1299
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1300 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1301 const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1302 {
1303 for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1304 const uint32_t barrierIndex = barrierIndexBegin + idx;
1305 PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1306 if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1307 const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1308 const bool needsArgsBarrier =
1309 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1310 if (needsArgsBarrier) {
1311 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1312 CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1313 UpdateStateAndCreateBarriersGpuBuffer(
1314 resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1315 }
1316 }
1317 }
1318 }
1319
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1320 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1321 const array_view<const RenderCommandWithType>& cmdListRef)
1322 {
1323 const uint32_t nextListIdx = commandListCommandIndex + 1;
1324 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1325 const auto& nextCmdRef = cmdListRef[nextListIdx];
1326 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1327
1328 const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1329
1330 const bool needsBarrier =
1331 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1332 if (needsBarrier) {
1333 BindableImage bRes = {};
1334 bRes.handle = nextRc.handle;
1335 bRes.imageLayout = nextRc.imageLayout;
1336 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1337 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1338 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1339 }
1340 }
1341
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1342 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1343 const array_view<const RenderCommandWithType>& cmdListRef)
1344 {
1345 const uint32_t nextListIdx = commandListCommandIndex + 1;
1346 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1347 const auto& nextCmdRef = cmdListRef[nextListIdx];
1348 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1349
1350 const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1351
1352 const bool needsSrcBarrier =
1353 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1354 if (needsSrcBarrier) {
1355 BindableImage bRes = {};
1356 bRes.handle = nextRc.srcHandle;
1357 bRes.imageLayout = nextRc.srcImageLayout;
1358 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1359 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1360 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1361 }
1362
1363 const bool needsDstBarrier =
1364 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1365 if (needsDstBarrier) {
1366 BindableImage bRes = {};
1367 bRes.handle = nextRc.dstHandle;
1368 bRes.imageLayout = nextRc.dstImageLayout;
1369 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1370 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1371 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1372 }
1373 }
1374
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1375 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1376 const array_view<const RenderCommandWithType>& cmdListRef)
1377 {
1378 const uint32_t nextListIdx = commandListCommandIndex + 1;
1379 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1380 const auto& nextCmdRef = cmdListRef[nextListIdx];
1381 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1382
1383 const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1384
1385 const bool needsSrcBarrier =
1386 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1387 if (needsSrcBarrier) {
1388 const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1389 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1390 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1391 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1392 }
1393
1394 const bool needsDstBarrier =
1395 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1396 if (needsDstBarrier) {
1397 const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1398 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1399 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1400 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1401 }
1402 }
1403
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1404 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1405 const array_view<const RenderCommandWithType>& cmdListRef)
1406 {
1407 const uint32_t nextListIdx = commandListCommandIndex + 1;
1408 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1409 const auto& nextCmdRef = cmdListRef[nextListIdx];
1410 PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1411 (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1412
1413 // NOTE: two different command types supported
1414 RenderHandle srcHandle;
1415 RenderHandle dstHandle;
1416 ImageSubresourceLayers srcImgLayers;
1417 ImageSubresourceLayers dstImgLayers;
1418 if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1419 const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1420 PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1421 srcHandle = nextRc.srcHandle;
1422 dstHandle = nextRc.dstHandle;
1423 srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1424 dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1425 } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1426 const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1427 srcHandle = nextRc.srcHandle;
1428 dstHandle = nextRc.dstHandle;
1429 srcImgLayers = nextRc.imageCopy.srcSubresource;
1430 dstImgLayers = nextRc.imageCopy.dstSubresource;
1431 }
1432
1433 const bool needsSrcBarrier =
1434 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1435 if (needsSrcBarrier) {
1436 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1437 PLUGIN_UNUSED(handleType);
1438 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1439 if (handleType == RenderHandleType::GPU_BUFFER) {
1440 BindableBuffer bRes;
1441 bRes.handle = srcHandle;
1442 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1443 GpuResourceState {
1444 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1445 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1446 } else {
1447 BindableImage bRes;
1448 bRes.handle = srcHandle;
1449 bRes.mip = srcImgLayers.mipLevel;
1450 bRes.layer = srcImgLayers.baseArrayLayer;
1451 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1452 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1453 GpuResourceState {
1454 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1455 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1456 }
1457 }
1458
1459 const bool needsDstBarrier =
1460 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1461 if (needsDstBarrier) {
1462 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1463 PLUGIN_UNUSED(handleType);
1464 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1465 if (handleType == RenderHandleType::GPU_BUFFER) {
1466 BindableBuffer bRes;
1467 bRes.handle = dstHandle;
1468 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1469 GpuResourceState {
1470 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1471 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1472 } else {
1473 BindableImage bRes;
1474 bRes.handle = dstHandle;
1475 bRes.mip = dstImgLayers.mipLevel;
1476 bRes.layer = dstImgLayers.baseArrayLayer;
1477 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1478 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1479 GpuResourceState {
1480 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1481 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1482 }
1483 }
1484 }
1485
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1486 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1487 const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1488 {
1489 const uint32_t nextListIdx = commandListCommandIndex + 1;
1490 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1491 const auto& nextCmdRef = cmdListRef[nextListIdx];
1492 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1493
1494 const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1495
1496 const bool needsArgsBarrier =
1497 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1498 if (needsArgsBarrier) {
1499 const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1500 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1501 GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1502 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1503 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1504 }
1505 }
1506
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1507 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1508 const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1509 const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1510 {
1511 for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1512 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) != RenderHandleType::DESCRIPTOR_SET) {
1513 continue;
1514 }
1515
1516 // NOTE: for global descriptor sets we didn't know with render command list if it had dynamic resources
1517 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(descriptorSetHandle);
1518 if (additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) {
1519 if (!nodeDescriptorSetMgrRef.HasDynamicBarrierResources(descriptorSetHandle)) {
1520 continue;
1521 }
1522 }
1523
1524 const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1525 const auto& buffers = bindingResources.buffers;
1526 const auto& images = bindingResources.images;
1527 for (const auto& refBuf : buffers) {
1528 const auto& ref = refBuf.desc;
1529 const uint32_t descriptorCount = ref.binding.descriptorCount;
1530 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1531 if (descriptorCount == 0) {
1532 continue;
1533 }
1534 const uint32_t arrayOffset = ref.arrayOffset;
1535 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1536 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1537 // first is the ref, starting from 1 we use array offsets
1538 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1].desc;
1539 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1540 UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1541 }
1542 }
1543 }
1544 for (const auto& refImg : images) {
1545 const auto& ref = refImg.desc;
1546 const uint32_t descriptorCount = ref.binding.descriptorCount;
1547 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1548 if (descriptorCount == 0) {
1549 continue;
1550 }
1551 const uint32_t arrayOffset = ref.arrayOffset;
1552 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1553 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1554 // first is the ref, starting from 1 we use array offsets
1555 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1].desc;
1556 if (CheckForBarrierNeed(
1557 params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1558 UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1559 }
1560 }
1561 }
1562 } // end for
1563 }
1564
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1565 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1566 const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1567 {
1568 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1569 if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1570 return;
1571 }
1572
1573 auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1574 // NOTE: we previous patched the final render pass layouts here
1575 // ATM: we only path the swapchain image if needed
1576
1577 const GpuResourceState& prevState = ref.state;
1578 const BindableImage& prevImage = ref.resource;
1579 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1580 const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1581 : GetSrcImageBarrier(prevState, prevImage);
1582
1583 const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1584 // NOTE: we are not interested in access flags, only write access interests us
1585 // not this (prevStateRb.accessFlags NOT state.accessFlags)
1586 const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (state.accessFlags & WRITE_ACCESS_FLAGS);
1587 const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1588 // input attachments are handled with render passes and not with barriers
1589 if ((layoutChanged || writeTarget) && (!inputAttachment)) {
1590 if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1591 (prevState.gpuQueue.type != state.gpuQueue.type)) {
1592 PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1593
1594 PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1595 currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1596 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1597 } else {
1598 const ResourceBarrier dstImageBarrier =
1599 addMips ? GetDstImageBarrierMips(state, prevImage, res) : GetDstImageBarrier(state, res);
1600 params.combinedBarriers.push_back(
1601 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1602 }
1603
1604 ref.state = state;
1605 ref.resource = res;
1606 ref.prevRc = params.rcWithType;
1607 ref.prevRenderNodeIndex = params.renderNodeIndex;
1608 if (addMips) {
1609 ModifyAdditionalImageState(res, ref.additionalState);
1610 }
1611 }
1612 }
1613
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1614 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1615 const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1616 {
1617 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1618 if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1619 return;
1620 }
1621
1622 // get the current state of the buffer
1623 auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1624 const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1625 // if previous or current state is write -> barrier
1626 if ((prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (dstState.accessFlags & WRITE_ACCESS_FLAGS)) {
1627 params.combinedBarriers.push_back(CommandBarrier {
1628 res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1629 }
1630
1631 // update the cached state to match the situation after the barrier
1632 srcStateRef.state = dstState;
1633 srcStateRef.resource = res;
1634 srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1635 }
1636
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1637 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1638 const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer, vector<CommandBarrier>& barriers,
1639 vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1640 {
1641 auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1642 const GpuResourceState srcState = stateRef.state;
1643 const BindableBuffer srcBuffer = stateRef.resource;
1644
1645 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1646 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1647 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1648 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1649 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1650 currNodeGpuResourceTransfer.push_back(
1651 RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1652 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1653 } else {
1654 const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1655 const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1656
1657 barriers.push_back(CommandBarrier {
1658 newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1659 }
1660
1661 stateRef.state = newGpuResourceState;
1662 stateRef.resource = newBuffer;
1663 stateRef.prevRenderNodeIndex = renderNodeIndex;
1664 }
1665
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1666 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1667 const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1668 vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1669 {
1670 // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1671
1672 auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1673 const GpuResourceState srcState = stateRef.state;
1674 const BindableImage srcImage = stateRef.resource;
1675 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1676
1677 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1678 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1679 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1680 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1681 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1682 currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1683 stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1684 } else {
1685 const ResourceBarrier srcBarrier =
1686 addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1687 : GetSrcImageBarrier(srcState, srcImage);
1688 const ResourceBarrier dstBarrier = addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage)
1689 : GetDstImageBarrier(newGpuResourceState, newImage);
1690
1691 barriers.push_back(CommandBarrier {
1692 newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1693 }
1694
1695 stateRef.state = newGpuResourceState;
1696 stateRef.resource = newImage;
1697 stateRef.prevRc = rcWithType;
1698 stateRef.prevRenderNodeIndex = renderNodeIndex;
1699 if (addMips) {
1700 ModifyAdditionalImageState(newImage, stateRef.additionalState);
1701 }
1702 }
1703
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1704 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1705 const RenderHandle handle, const GpuQueue& queue)
1706 {
1707 // NOTE: Do not call with non dynamic trackable
1708 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1709 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1710 if (arrayIndex < gpuBufferDataIndices_.size()) {
1711 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1712 uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1713 if (dataIdx == INVALID_TRACK_IDX) {
1714 if (!gpuBufferAvailableIndices_.empty()) {
1715 dataIdx = gpuBufferAvailableIndices_.back();
1716 gpuBufferAvailableIndices_.pop_back();
1717 } else {
1718 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1719 gpuBufferTracking_.emplace_back();
1720 }
1721 gpuBufferDataIndices_[arrayIndex] = dataIdx;
1722
1723 gpuBufferTracking_[dataIdx].resource.handle = handle;
1724 gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1725 }
1726 return gpuBufferTracking_[dataIdx];
1727 }
1728
1729 return defaultBufferState_;
1730 }
1731
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1732 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1733 const RenderHandle handle, const GpuQueue& queue)
1734 {
1735 // NOTE: Do not call with non dynamic trackable
1736 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1737 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1738 if (arrayIndex < gpuImageDataIndices_.size()) {
1739 // NOTE: render pass attachments expected to be dynamic resources always
1740 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1741 uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1742 if (dataIdx == INVALID_TRACK_IDX) {
1743 if (!gpuImageAvailableIndices_.empty()) {
1744 dataIdx = gpuImageAvailableIndices_.back();
1745 gpuImageAvailableIndices_.pop_back();
1746 } else {
1747 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1748 gpuImageTracking_.emplace_back();
1749 }
1750 gpuImageDataIndices_[arrayIndex] = dataIdx;
1751
1752 gpuImageTracking_[dataIdx].resource.handle = handle;
1753 gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1754 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1755 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1756 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1757 }
1758 }
1759 #if (RENDER_VALIDATION_ENABLED == 1)
1760 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1761 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1762 PLUGIN_LOG_ONCE_W("dynamic_state_mips_issue_" + to_string(handle.id),
1763 "RENDER_VALIDATION: Additional mip states missing (handle:%" PRIx64 ")", handle.id);
1764 }
1765 #endif
1766 return gpuImageTracking_[dataIdx];
1767 }
1768
1769 PLUGIN_LOG_ONCE_W("render_graph_image_state_issues", "RenderGraph: Image tracking issue with handle count");
1770 return defaultImageState_;
1771 }
1772 RENDER_END_NAMESPACE()
1773