1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_graph.h"
17
18 #include <cinttypes>
19
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24
25 #include "device/device.h"
26 #include "device/gpu_resource_cache.h"
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32
33 using namespace BASE_NS;
34
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
47 switch (rc.type) {
48 case RenderCommandType::DRAW: {
49 PLUGIN_LOG_I("rc: Draw");
50 break;
51 }
52 case RenderCommandType::DRAW_INDIRECT: {
53 PLUGIN_LOG_I("rc: DrawIndirect");
54 break;
55 }
56 case RenderCommandType::DISPATCH: {
57 PLUGIN_LOG_I("rc: Dispatch");
58 break;
59 }
60 case RenderCommandType::DISPATCH_INDIRECT: {
61 PLUGIN_LOG_I("rc: DispatchIndirect");
62 break;
63 }
64 case RenderCommandType::BIND_PIPELINE: {
65 PLUGIN_LOG_I("rc: BindPipeline");
66 break;
67 }
68 case RenderCommandType::BEGIN_RENDER_PASS: {
69 PLUGIN_LOG_I("rc: BeginRenderPass");
70 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
71 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
72 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
73 const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
74 PLUGIN_LOG_I(" attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
75 }
76 PLUGIN_LOG_I(" subpass count: %u, subpass start idx: %u",
77 (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
78 }
79 break;
80 }
81 case RenderCommandType::NEXT_SUBPASS: {
82 PLUGIN_LOG_I("rc: NextSubpass");
83 break;
84 }
85 case RenderCommandType::END_RENDER_PASS: {
86 PLUGIN_LOG_I("rc: EndRenderPass");
87 break;
88 }
89 case RenderCommandType::BIND_VERTEX_BUFFERS: {
90 PLUGIN_LOG_I("rc: BindVertexBuffers");
91 break;
92 }
93 case RenderCommandType::BIND_INDEX_BUFFER: {
94 PLUGIN_LOG_I("rc: BindIndexBuffer");
95 break;
96 }
97 case RenderCommandType::COPY_BUFFER: {
98 PLUGIN_LOG_I("rc: CopyBuffer");
99 break;
100 }
101 case RenderCommandType::COPY_BUFFER_IMAGE: {
102 PLUGIN_LOG_I("rc: CopyBufferImage");
103 break;
104 }
105 case RenderCommandType::COPY_IMAGE: {
106 PLUGIN_LOG_I("rc: CopyImage");
107 break;
108 }
109 case RenderCommandType::BLIT_IMAGE: {
110 PLUGIN_LOG_I("rc: BlitImage");
111 break;
112 }
113 case RenderCommandType::BARRIER_POINT: {
114 PLUGIN_LOG_I("rc: BarrierPoint");
115 break;
116 }
117 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
118 PLUGIN_LOG_I("rc: BindDescriptorSets");
119 break;
120 }
121 case RenderCommandType::PUSH_CONSTANT: {
122 PLUGIN_LOG_I("rc: PushConstant");
123 break;
124 }
125 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
126 PLUGIN_LOG_I("rc: BuildAccelerationStructure");
127 break;
128 }
129 case RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES: {
130 PLUGIN_LOG_I("rc: CopyAccelerationStructureInstances");
131 break;
132 }
133 case RenderCommandType::CLEAR_COLOR_IMAGE: {
134 PLUGIN_LOG_I("rc: ClearColorImage");
135 break;
136 }
137
138 // dynamic states
139 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
140 PLUGIN_LOG_I("rc: DynamicStateViewport");
141 break;
142 }
143 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
144 PLUGIN_LOG_I("rc: DynamicStateScissor");
145 break;
146 }
147 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
148 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
149 break;
150 }
151 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
152 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
153 break;
154 }
155 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
156 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
157 break;
158 }
159 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
160 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
161 break;
162 }
163 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
164 PLUGIN_LOG_I("rc: DynamicStateStencil");
165 break;
166 }
167 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
168 PLUGIN_LOG_I("rc: DynamicStateFragmentShadingRate");
169 break;
170 }
171 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
172 PLUGIN_LOG_I("rc: ExecuteBackendFramePosition");
173 break;
174 }
175
176 case RenderCommandType::WRITE_TIMESTAMP: {
177 PLUGIN_LOG_I("rc: WriteTimestamp");
178 break;
179 }
180 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
181 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
182 break;
183 }
184 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
185 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
186 break;
187 }
188 case RenderCommandType::BEGIN_DEBUG_MARKER: {
189 PLUGIN_LOG_I("rc: BeginDebugMarker");
190 break;
191 }
192 case RenderCommandType::END_DEBUG_MARKER: {
193 PLUGIN_LOG_I("rc: EndDebugMarker");
194 break;
195 }
196 case RenderCommandType::UNDEFINED:
197 case RenderCommandType::COUNT: {
198 PLUGIN_ASSERT(false && "non-valid render command");
199 break;
200 }
201 }
202 }
203 }
204
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)205 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
206 {
207 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
208 for (const auto& ref : combinedBarriers) {
209 const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
210 if (type == RenderHandleType::GPU_BUFFER) {
211 PLUGIN_LOG_I("barrier buffer :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
212 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
213 ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags);
214 } else {
215 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
216 PLUGIN_LOG_I("barrier image :: handle:0x%" PRIx64
217 " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
218 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(),
219 ref.src.pipelineStageFlags, ref.dst.pipelineStageFlags, ref.src.optionalImageLayout,
220 ref.dst.optionalImageLayout);
221 }
222 }
223 }
224 }
225
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)226 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
227 {
228 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
229 for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
230 const auto handle = rc.renderPassDesc.attachmentHandles[idx];
231 const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
232 const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
233 PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64
234 " name:%s, src_layout:%u dst_layout:%u (patched later)",
235 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
236 }
237 }
238 }
239
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)240 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
241 {
242 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
243 // NOTE: gpuHandle might be the same when generation index wraps around
244 // and when using shallow handles (shadow -> re-use normal -> shadow -> re-use normal etc)
245 const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
246 PLUGIN_LOG_I("image_state :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
247 resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
248 resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
249 RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
250 RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
251 // one could fetch and print vulkan handle here as well e.g.
252 // 1. const GpuImagePlatformDataVk& plat =
253 // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
254 // 3. PLUGIN_LOG_I("end_frame image :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
255 }
256 }
257 #endif // RENDER_DEV_ENABLED
258
259 constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
260 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
261 CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
262 CORE_ACCESS_MEMORY_WRITE_BIT;
263
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)264 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
265 RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
266 {
267 const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
268 for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
269 if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
270 beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
271 storeState.resource.imageLayout = imageLayout;
272 }
273 }
274 }
275
UpdateMultiRenderCommandListRenderPasses(Device & device,RenderGraph::MultiRenderPassStore & store)276 void UpdateMultiRenderCommandListRenderPasses(Device& device, RenderGraph::MultiRenderPassStore& store)
277 {
278 const auto renderPassCount = (uint32_t)store.renderPasses.size();
279 PLUGIN_ASSERT(renderPassCount > 1);
280
281 RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
282 PLUGIN_ASSERT(firstRenderPass);
283 PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
284 const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
285 PLUGIN_ASSERT(lastRenderPass);
286
287 const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
288
289 // take attachment loads from the first one, and stores from the last one
290 // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
291 // initial store the correct render pass description to first render pass and then copy to others
292 // resource states are copied from valid subpasses to another render command list subpasses
293 for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
294 const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
295 const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
296 const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
297 for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
298 if (fromRpIdx != toRpIdx) {
299 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
300 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
301 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
302 toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
303 toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
304 }
305 }
306 }
307 }
308
309 for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
310 firstRenderPass->renderPassDesc.attachments[idx].storeOp =
311 lastRenderPass->renderPassDesc.attachments[idx].storeOp;
312 firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
313 lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
314
315 firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
316 lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
317 }
318
319 // copy subpasses to first and mark if merging subpasses
320 bool mergeSubpasses = false;
321 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
322 if ((idx < store.renderPasses.size()) && (idx < store.renderPasses[idx]->subpasses.size())) {
323 firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
324 if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
325 mergeSubpasses = true;
326 }
327 }
328 #if (RENDER_VALIDATION_ENABLED == 1)
329 if ((idx >= store.renderPasses.size()) || (idx >= store.renderPasses[idx]->subpasses.size())) {
330 PLUGIN_LOG_W("Invalid render pass subpass configuration for multi render pass");
331 }
332 #endif
333 }
334 // NOTE: only use merge subpasses in vulkan at the moment
335 if (device.GetBackendType() != DeviceBackendType::VULKAN) {
336 mergeSubpasses = false;
337 }
338
339 uint32_t subpassCount = renderPassCount;
340 if (mergeSubpasses) {
341 PLUGIN_ASSERT(renderPassCount > 1U);
342 // merge from back to front
343 const uint32_t finalSubpass = renderPassCount - 1U;
344 uint32_t mergeCount = 0U;
345 for (uint32_t idx = finalSubpass; idx > 0U; --idx) {
346 if (firstRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
347 PLUGIN_ASSERT(idx > 0U);
348
349 uint32_t prevSubpassIdx = idx - 1U;
350 auto& currSubpass = firstRenderPass->subpasses[idx];
351 auto& prevSubpass = firstRenderPass->subpasses[prevSubpassIdx];
352 // cannot merge in these cases
353 if (currSubpass.inputAttachmentCount != prevSubpass.inputAttachmentCount) {
354 currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
355 #if (RENDER_VALIDATION_ENABLED == 1)
356 PLUGIN_LOG_W(
357 "RENDER_VALIDATION: Trying to merge subpasses with input attachments, undefined results");
358 #endif
359 }
360 if (prevSubpass.resolveAttachmentCount > currSubpass.resolveAttachmentCount) {
361 currSubpass.subpassFlags &= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
362 #if (RENDER_VALIDATION_ENABLED == 1)
363 PLUGIN_LOG_W("RENDER_VALIDATION: Trying to merge subpasses with different resolve counts, "
364 "undefined results");
365 #endif
366 }
367 if ((currSubpass.subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) == 0) {
368 // merge failed -> continue
369 continue;
370 }
371
372 mergeCount++;
373 auto& currRenderPass = store.renderPasses[idx];
374 const auto& currSubpassResourceStates = currRenderPass->subpassResourceStates[idx];
375 currRenderPass->subpassStartIndex = currRenderPass->subpassStartIndex - 1U;
376 // can merge
377 currSubpass.subpassFlags |= SubpassFlagBits::CORE_SUBPASS_MERGE_BIT;
378
379 auto& prevRenderPass = store.renderPasses[prevSubpassIdx];
380 auto& prevSubpassResourceStates = prevRenderPass->subpassResourceStates[prevSubpassIdx];
381 // NOTE: at the moment copies everything from the current subpass
382 CloneData(&prevSubpass, sizeof(RenderPassSubpassDesc), &currSubpass, sizeof(RenderPassSubpassDesc));
383 // copy layouts and states from the current to previous
384 for (uint32_t resourceIdx = 0U; resourceIdx < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT;
385 ++resourceIdx) {
386 prevSubpassResourceStates.layouts[resourceIdx] = currSubpassResourceStates.layouts[resourceIdx];
387 prevSubpassResourceStates.states[resourceIdx] = currSubpassResourceStates.states[resourceIdx];
388 }
389 }
390 }
391
392 // new minimal subpass count
393 subpassCount = subpassCount - mergeCount;
394 firstRenderPass->renderPassDesc.subpassCount = subpassCount;
395 firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), subpassCount };
396 // update subpass start indices
397 uint32_t subpassStartIndex = 0;
398 for (uint32_t idx = 1U; idx < renderPassCount; ++idx) {
399 auto& currRenderPass = store.renderPasses[idx];
400 if (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT) {
401 currRenderPass->subpassStartIndex = subpassStartIndex;
402 } else {
403 subpassStartIndex++;
404 }
405 }
406 }
407
408 // copy from first to following render passes
409 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
410 // subpass start index is the only changing variables
411 auto& currRenderPass = store.renderPasses[idx];
412 const uint32_t subpassStartIndex = currRenderPass->subpassStartIndex;
413 currRenderPass->renderPassDesc = firstRenderPass->renderPassDesc;
414 // advance subpass start index if not merging
415 if (mergeSubpasses &&
416 ((idx < currRenderPass->subpasses.size()) &&
417 (currRenderPass->subpasses[idx].subpassFlags & SubpassFlagBits::CORE_SUBPASS_MERGE_BIT))) {
418 // NOTE: subpassResourceStates are copied in this case
419 currRenderPass->subpassResourceStates[subpassStartIndex] =
420 firstRenderPass->subpassResourceStates[subpassStartIndex];
421 }
422 currRenderPass->subpassStartIndex = subpassStartIndex;
423 // copy all subpasses and input resource states
424 currRenderPass->subpasses = firstRenderPass->subpasses;
425 currRenderPass->inputResourceStates = firstRenderPass->inputResourceStates;
426 // image layouts needs to match
427 currRenderPass->imageLayouts = firstRenderPass->imageLayouts;
428 // NOTE: subpassResourceStates are only copied when doing merging
429 }
430 }
431
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)432 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
433 {
434 return {
435 state.accessFlags,
436 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
437 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
438 res.byteOffset,
439 res.byteSize,
440 };
441 }
442
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)443 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
444 {
445 return {
446 state.accessFlags,
447 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
448 res.imageLayout,
449 0,
450 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
451 };
452 }
453
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)454 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
455 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
456 {
457 uint32_t mipLevel = 0U;
458 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
459 ImageLayout srcImageLayout = src.imageLayout;
460 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
461 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
462 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
463 mipLevel = dst.mip;
464 mipCount = 1U;
465 } else {
466 mipLevel = src.mip;
467 // all mip levels
468 }
469 PLUGIN_ASSERT(additionalImageState.layouts);
470 srcImageLayout = additionalImageState.layouts[mipLevel];
471 }
472 return {
473 state.accessFlags,
474 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
475 srcImageLayout,
476 0,
477 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
478 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
479 };
480 }
481
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)482 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
483 {
484 return {
485 state.accessFlags,
486 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
487 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
488 res.byteOffset,
489 res.byteSize,
490 };
491 }
492
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)493 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
494 {
495 return {
496 state.accessFlags,
497 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
498 res.imageLayout,
499 0,
500 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
501 };
502 }
503
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst)504 ResourceBarrier GetDstImageBarrierMips(
505 const GpuResourceState& state, const BindableImage& src, const BindableImage& dst)
506 {
507 uint32_t mipLevel = 0U;
508 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
509 ImageLayout dstImageLayout = dst.imageLayout;
510 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
511 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
512 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
513 mipLevel = dst.mip;
514 mipCount = 1U;
515 } else {
516 mipLevel = src.mip;
517 // all mip levels
518 }
519 }
520 return {
521 state.accessFlags,
522 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
523 dstImageLayout,
524 0,
525 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
526 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
527 };
528 }
529
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)530 void ModifyAdditionalImageState(
531 const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
532 {
533 #if (RENDER_VALIDATION_ENABLED == 1)
534 // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
535 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
536 #endif
537 if (additionalStateRef.layouts) {
538 if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
539 (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
540 additionalStateRef.layouts[res.mip] = res.imageLayout;
541 } else {
542 // set layout for all mips
543 for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
544 additionalStateRef.layouts[idx] = res.imageLayout;
545 }
546 }
547 } else {
548 #if (RENDER_VALIDATION_ENABLED == 1)
549 PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
550 #endif
551 }
552 }
553
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)554 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
555 const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
556 {
557 return {
558 handle,
559
560 ResourceBarrier {
561 0,
562 PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
563 srcImageLayout,
564 0,
565 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
566 ImageSubresourceRange {},
567 },
568 srcGpuQueue,
569
570 ResourceBarrier {
571 0,
572 PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
573 dstImageLayout,
574 0,
575 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
576 ImageSubresourceRange {},
577 },
578 dstGpuQueue,
579 };
580 }
581
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)582 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
583 array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
584 {
585 for (const auto& transferRef : currNodeGpuResourceTransfers) {
586 PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
587 if (transferRef.acquireNodeIdx >= frameRenderNodeContextData.size()) {
588 // skip
589 continue;
590 }
591
592 auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
593 const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
594 GpuQueue releaseGpuQueue = acquireGpuQueue;
595
596 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
597 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
598 releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
599 }
600
601 const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
602 acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
603
604 // release ownership (NOTE: not done for previous frame)
605 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
606 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
607 const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
608 const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
609 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
610 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
611 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
612
613 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
614 releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
615
616 // inform that we are patching valid barriers
617 releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
618 }
619 // acquire ownership
620 {
621 const uint32_t rcIndex = 0;
622 const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
623 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
624 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
625 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
626
627 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
628 acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
629
630 // inform that we are patching valid barriers
631 acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
632 }
633 }
634 }
635
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)636 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
637 const uint32_t customBarrierCount, const RenderHandle handle)
638 {
639 bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
640 if ((customBarrierCount > 0) && needsBarrier) {
641 needsBarrier = (handledCustomBarriers.count(handle) == 0);
642 }
643 return needsBarrier;
644 }
645 } // namespace
646
RenderGraph(Device & device)647 RenderGraph::RenderGraph(Device& device)
648 : device_(device), gpuResourceMgr_((GpuResourceManager&)device.GetGpuResourceManager())
649 {}
650
BeginFrame()651 void RenderGraph::BeginFrame()
652 {
653 stateCache_.multiRenderPassStore.renderPasses.clear();
654 stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
655 stateCache_.multiRenderPassStore.supportOpen = false;
656 stateCache_.nodeCounter = 0u;
657 stateCache_.checkForBackbufferDependency = false;
658 stateCache_.usesSwapchainImage = false;
659 }
660
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)661 void RenderGraph::ProcessRenderNodeGraph(
662 const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
663 {
664 stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
665
666 // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
667 // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
668 // handles:
669 // gpu images in descriptor sets, render passes, blits, and custom barriers
670 // gpu buffers in descriptor sets, and custom barriers
671
672 {
673 // remove resources that will not be tracked anymore and release available slots
674 const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
675 for (const auto& handle : stateResetData.resources) {
676 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
677 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
678 if ((handleType == RenderHandleType::GPU_IMAGE) &&
679 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
680 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
681 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
682 gpuImageTracking_[dataIdx] = {}; // reset
683 gpuImageAvailableIndices_.push_back(dataIdx);
684 }
685 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
686 } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
687 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
688 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
689 gpuBufferTracking_[dataIdx] = {}; // reset
690 gpuBufferAvailableIndices_.push_back(dataIdx);
691 }
692 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
693 }
694 }
695 }
696
697 gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
698 gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
699
700 #if (RENDER_DEV_ENABLED == 1)
701 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
702 CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
703 static uint64_t debugFrame = 0;
704 debugFrame++;
705 PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
706 }
707 #endif
708
709 // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
710 ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
711
712 // store final state for next frame
713 StoreFinalBufferState();
714 StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
715 }
716
GetSwapchainResourceStates() const717 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
718 {
719 return swapchainStates_;
720 }
721
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)722 void RenderGraph::ProcessRenderNodeGraphNodeStores(
723 const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
724 {
725 for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
726 PLUGIN_ASSERT(graphStore);
727 if (!graphStore) {
728 continue;
729 }
730
731 for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
732 auto& ref = graphStore->renderNodeContextData[nodeIdx];
733 ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
734 stateCache.usesSwapchainImage = false; // reset
735
736 #if (RENDER_DEV_ENABLED == 1)
737 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
738 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
739 }
740 #endif
741
742 if (stateCache.multiRenderPassStore.supportOpen && (stateCache.multiRenderPassStore.renderPasses.empty())) {
743 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
744 // NOTE: add more error handling and invalidate render command lists
745 }
746 stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
747 array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
748 // go through commands that affect or need transitions and barriers
749 ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
750
751 // needs backbuffer/swapchain wait
752 if (stateCache.usesSwapchainImage) {
753 ref.submitInfo.waitForSwapchainAcquireSignal = true;
754 }
755
756 // patch gpu resource queue transfers
757 if (!currNodeGpuResourceTransfers_.empty()) {
758 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
759 // clear for next use
760 currNodeGpuResourceTransfers_.clear();
761 }
762
763 stateCache_.nodeCounter++;
764 }
765 }
766 }
767
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)768 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
769 const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
770 {
771 for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
772 auto& cmdRef = cmdListRef[listIdx];
773
774 #if (RENDER_DEV_ENABLED == 1)
775 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
776 DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
777 }
778 #endif
779
780 // most of the commands are handled within BarrierPoint
781 switch (cmdRef.type) {
782 case RenderCommandType::BARRIER_POINT:
783 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
784 break;
785
786 case RenderCommandType::BEGIN_RENDER_PASS:
787 RenderCommand(
788 nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
789 break;
790
791 case RenderCommandType::END_RENDER_PASS:
792 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
793 break;
794
795 case RenderCommandType::NEXT_SUBPASS:
796 case RenderCommandType::DRAW:
797 case RenderCommandType::DRAW_INDIRECT:
798 case RenderCommandType::DISPATCH:
799 case RenderCommandType::DISPATCH_INDIRECT:
800 case RenderCommandType::BIND_PIPELINE:
801 case RenderCommandType::BIND_VERTEX_BUFFERS:
802 case RenderCommandType::BIND_INDEX_BUFFER:
803 case RenderCommandType::COPY_BUFFER:
804 case RenderCommandType::COPY_BUFFER_IMAGE:
805 case RenderCommandType::COPY_IMAGE:
806 case RenderCommandType::BIND_DESCRIPTOR_SETS:
807 case RenderCommandType::PUSH_CONSTANT:
808 case RenderCommandType::BLIT_IMAGE:
809 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
810 case RenderCommandType::CLEAR_COLOR_IMAGE:
811 case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
812 case RenderCommandType::DYNAMIC_STATE_SCISSOR:
813 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
814 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
815 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
816 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
817 case RenderCommandType::DYNAMIC_STATE_STENCIL:
818 case RenderCommandType::WRITE_TIMESTAMP:
819 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
820 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
821 case RenderCommandType::UNDEFINED:
822 default: {
823 // nop
824 break;
825 }
826 }
827 } // end command for
828 }
829
StoreFinalBufferState()830 void RenderGraph::StoreFinalBufferState()
831 {
832 for (auto& ref : gpuBufferTracking_) {
833 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
834 ref = {};
835 continue;
836 }
837 // NOTE: we cannot soft reset here
838 // if we do so some buffer usage might overlap in the next frame
839 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
840 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
841 const RenderHandle handle = ref.resource.handle;
842 ref = {};
843 ref.resource.handle = handle;
844 }
845
846 // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
847 // frames)
848 ref.prevRenderNodeIndex = { ~0u };
849 }
850 }
851
StoreFinalImageState()852 void RenderGraph::StoreFinalImageState()
853 {
854 swapchainStates_ = {}; // reset
855
856 #if (RENDER_DEV_ENABLED == 1)
857 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
858 PLUGIN_LOG_I("end_frame image_state:");
859 }
860 #endif
861 for (auto& ref : gpuImageTracking_) {
862 // if resource is not dynamic, we do not track and care
863 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
864 ref = {};
865 continue;
866 }
867 // handle automatic presentation layout
868 if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
869 if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
870 RenderCommandBeginRenderPass& beginRenderPass =
871 *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
872 PatchRenderPassFinalLayout(
873 ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
874 }
875 // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
876 // store final state for backbuffer
877 // currently we only swapchains if they are really in use in this frame
878 const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
879 if (flags != 0) {
880 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
881 }
882 }
883 #if (RENDER_DEV_ENABLED == 1)
884 // print before reset for next frame
885 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
886 DebugPrintImageState(gpuResourceMgr_, ref);
887 }
888 #endif
889 // shallow resources are not tracked
890 // they are always in undefined state in the beging of the frame
891 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
892 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
893 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
894 const RenderHandle handle = ref.resource.handle;
895 ref = {};
896 ref.resource.handle = handle;
897 if (addMips) {
898 PLUGIN_ASSERT(!ref.additionalState.layouts);
899 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
900 }
901 }
902 // NOTE: render pass compatibility hashing with stages and access flags
903 // creates quite many new graphics pipelines in the first few frames.
904 // else branch with soft reset here could prevent access flags from previous frame.
905 // To get this to work one could get the flags from the end of the frame to the begin as well.
906
907 // need to reset per frame variables for all images (so we do not try to patch from previous frames)
908 ref.prevRc = {};
909 ref.prevRenderNodeIndex = { ~0u };
910 }
911 }
912
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)913 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
914 RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
915 {
916 // update layouts for attachments to gpu image state
917 BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
918
919 PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
920
921 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
922 if (hasRenderPassDependency) { // stitch render pass subpasses
923 BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
924 }
925
926 const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
927
928 auto finalImageLayouts =
929 array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
930
931 BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
932
933 for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
934 const auto& subpassRef = rc.subpasses[subpassIdx];
935 const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
936
937 BeginRenderPassUpdateSubpassImageStates(
938 array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
939 subpassResourceStatesRef, finalImageLayouts);
940
941 BeginRenderPassUpdateSubpassImageStates(
942 array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
943 subpassResourceStatesRef, finalImageLayouts);
944
945 BeginRenderPassUpdateSubpassImageStates(
946 array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
947 subpassResourceStatesRef, finalImageLayouts);
948
949 if (subpassRef.depthAttachmentCount == 1u) {
950 BeginRenderPassUpdateSubpassImageStates(
951 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
952 subpassResourceStatesRef, finalImageLayouts);
953 if (subpassRef.depthResolveAttachmentCount == 1) {
954 BeginRenderPassUpdateSubpassImageStates(
955 array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
956 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
957 }
958 }
959 if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
960 BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
961 subpassRef.fragmentShadingRateAttachmentCount),
962 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts);
963 }
964 }
965
966 if (hasRenderPassDependency) { // stitch render pass subpasses
967 if (rc.subpassStartIndex > 0) {
968 // stitched to behave as a nextSubpass() and not beginRenderPass()
969 rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
970 }
971 const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
972 if (finalSubpass) {
973 UpdateMultiRenderCommandListRenderPasses(device_, stateCache.multiRenderPassStore);
974 // multiRenderPassStore cleared in EndRenderPass
975 }
976 }
977 #if (RENDER_DEV_ENABLED == 1)
978 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
979 DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
980 }
981 #endif
982 }
983
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)984 void RenderGraph::BeginRenderPassHandleDependency(
985 BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
986 {
987 params.stateCache.multiRenderPassStore.renderPasses.push_back(¶ms.rc);
988 // store the first begin render pass
989 params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
990 params.stateCache.multiRenderPassStore.renderPasses[0] };
991
992 if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
993 #ifndef NDEBUG
994 // barrier point must be previous command
995 PLUGIN_ASSERT(commandListCommandIndex >= 1);
996 const uint32_t prevCommandIndex = commandListCommandIndex - 1;
997 const RenderCommandWithType& barrierPointCmdRef =
998 nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
999 PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
1000 PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
1001 #endif
1002 params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
1003 }
1004 }
1005
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)1006 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
1007 array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
1008 {
1009 auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
1010 const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
1011 auto& attachments = params.rc.renderPassDesc.attachments;
1012 auto& attachmentInputResourceStates = params.rc.inputResourceStates;
1013
1014 for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
1015 const RenderHandle handle = attachmentHandles[attachmentIdx];
1016 // NOTE: invalidate invalid handle commands already in render command list
1017 if (!RenderHandleUtil::IsGpuImage(handle)) {
1018 #ifdef _DEBUG
1019 PLUGIN_LOG_E("invalid handle in render node graph");
1020 #endif
1021 continue;
1022 }
1023 auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
1024 ImageLayout imgLayout = stateRef.resource.imageLayout;
1025
1026 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1027 // image layout is undefined if automatic barriers have been disabled
1028 if (params.rc.enableAutomaticLayoutChanges) {
1029 const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
1030 if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
1031 if (stateRef.additionalState.layouts) {
1032 imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
1033 } else {
1034 #if (RENDER_VALIDATION_ENABLED == 1)
1035 PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
1036 #endif
1037 }
1038 }
1039
1040 initialImageLayouts[attachmentIdx] = imgLayout;
1041 }
1042 // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
1043 if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
1044 (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
1045 // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
1046 attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
1047 }
1048 finalImageLayouts[attachmentIdx] = imgLayout;
1049 attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
1050 attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
1051
1052 // store render pass for final layout patching
1053 stateRef.prevRc = params.rpForCmdRef;
1054 stateRef.prevRenderNodeIndex = renderNodeIndex;
1055
1056 // flag for backbuffer use
1057 if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
1058 params.stateCache.usesSwapchainImage = true;
1059 }
1060 }
1061 }
1062
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts)1063 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
1064 const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
1065 array_view<ImageLayout> finalImageLayouts)
1066 {
1067 for (const uint32_t attachmentIndex : attatchmentIndices) {
1068 // NOTE: handle invalid commands already in render command list and invalidate draws etc.
1069 PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
1070 const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
1071 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1072 const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
1073 const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
1074 // NOTE: we should support non dynamicity and GENERAL
1075
1076 finalImageLayouts[attachmentIndex] = refImgLayout;
1077 auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
1078 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
1079
1080 ref.state = refState;
1081 ref.resource.handle = handle;
1082 ref.resource.imageLayout = refImgLayout;
1083 if (addMips) {
1084 const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1085 const BindableImage image {
1086 handle,
1087 attachmentDesc.mipLevel,
1088 attachmentDesc.layer,
1089 refImgLayout,
1090 RenderHandle {},
1091 };
1092 ModifyAdditionalImageState(image, ref.additionalState);
1093 }
1094 }
1095 }
1096
RenderCommand(RenderCommandEndRenderPass & rc,StateCache & stateCache)1097 void RenderGraph::RenderCommand(RenderCommandEndRenderPass& rc, StateCache& stateCache)
1098 {
1099 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1100 if (hasRenderPassDependency) {
1101 const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1102 if (finalSubpass) {
1103 if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1104 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1105 // NOTE: add more error handling and invalidate render command lists
1106 }
1107 rc.endType = RenderPassEndType::END_RENDER_PASS;
1108 stateCache.multiRenderPassStore.renderPasses.clear();
1109 stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1110 stateCache.multiRenderPassStore.supportOpen = false;
1111 } else {
1112 rc.endType = RenderPassEndType::END_SUBPASS;
1113 }
1114 }
1115 }
1116
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1117 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1118 RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1119 {
1120 // go through required descriptors for current upcoming event
1121 const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1122 const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1123 const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1124 const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1125
1126 parameterCachePools_.combinedBarriers.clear();
1127 parameterCachePools_.handledCustomBarriers.clear();
1128 ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1129 rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1130 nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1131 // first check custom barriers
1132 if (parameters.customBarrierCount > 0) {
1133 HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1134 }
1135 // then vertex / index buffer barriers in the barrier point before render pass
1136 if (parameters.vertexInputBarrierCount > 0) {
1137 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1138 HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1139 nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1140 }
1141 if (parameters.indirectBufferBarrierCount > 0U) {
1142 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1143 HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1144 nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1145 }
1146
1147 // in barrier point the next render command is known for which the barrier is needed
1148 if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1149 HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1150 } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1151 HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1152 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1153 HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1154 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1155 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1156 } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1157 HandleCopyBufferImage(
1158 parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image descriptor sets
1159 } else if (rc.renderCommandType == RenderCommandType::BUILD_ACCELERATION_STRUCTURE) {
1160 HandleBuildAccelerationStructure(parameters, commandListCommandIndex, cmdListRef);
1161 } else if (rc.renderCommandType == RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES) {
1162 HandleCopyAccelerationStructureInstances(parameters, commandListCommandIndex, cmdListRef);
1163 } else {
1164 if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1165 HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1166 } else if (rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS) {
1167 // additional render pass attachment barriers
1168 HandleRenderPassImage(parameters, commandListCommandIndex, cmdListRef);
1169 }
1170 const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1171 const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1172 const uint32_t descriptorSetHandleMaxIndex =
1173 Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1174 const auto descriptorSetHandlesForBarriers =
1175 array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1176 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1177 HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1178 }
1179
1180 if (!parameters.combinedBarriers.empty()) {
1181 // use first render pass barrier point with following subpasses
1182 // firstRenderPassBarrierPoint is null for the first subpass
1183 const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1184 if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1185 PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1186 stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1187 rc.barrierPointIndex, parameters.combinedBarriers);
1188 } else {
1189 nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1190 }
1191 }
1192 #if (RENDER_DEV_ENABLED == 1)
1193 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1194 DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1195 }
1196 #endif
1197 }
1198
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1199 inline void RenderGraph::UpdateBufferResourceState(
1200 RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1201 {
1202 stateRef.resource.handle = cb.resourceHandle;
1203 stateRef.state.shaderStageFlags = 0;
1204 stateRef.state.accessFlags = cb.dst.accessFlags;
1205 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1206 stateRef.state.gpuQueue = params.gpuQueue;
1207 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1208 }
1209
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1210 inline void RenderGraph::UpdateImageResourceState(
1211 RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1212 {
1213 stateRef.resource.handle = cb.resourceHandle;
1214 stateRef.state.shaderStageFlags = 0;
1215 stateRef.state.accessFlags = cb.dst.accessFlags;
1216 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1217 stateRef.state.gpuQueue = params.gpuQueue;
1218 stateRef.prevRc = params.rcWithType;
1219 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1220 }
1221
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1222 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1223 const array_view<const CommandBarrier>& customBarrierListRef)
1224 {
1225 params.handledCustomBarriers.reserve(params.customBarrierCount);
1226 PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1227 for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1228 end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1229 begin != end; ++begin) {
1230 // add a copy and modify if needed
1231 auto& cb = params.combinedBarriers.emplace_back(*begin);
1232
1233 // NOTE: undefined type is for non-resource memory/pipeline barriers
1234 const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1235 const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1236 PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1237 (type == RenderHandleType::GPU_IMAGE));
1238 if (type == RenderHandleType::GPU_BUFFER) {
1239 if (isDynamicTrack) {
1240 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1241 UpdateBufferResourceState(stateRef, params, cb);
1242 }
1243 params.handledCustomBarriers[cb.resourceHandle] = 0;
1244 } else if (type == RenderHandleType::GPU_IMAGE) {
1245 if (isDynamicTrack) {
1246 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1247 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1248 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1249 uint32_t mipLevel = 0U;
1250 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1251 ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1252 if (isAddMips) {
1253 const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1254 const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1255 if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1256 (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1257 if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1258 mipLevel = dstMip;
1259 mipCount = 1U;
1260 } else {
1261 mipLevel = srcMip;
1262 // all mip levels
1263 }
1264 if (stateRef.additionalState.layouts) {
1265 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1266 } else {
1267 #if (RENDER_VALIDATION_ENABLED == 1)
1268 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1269 #endif
1270 }
1271 }
1272 }
1273 cb.src.accessFlags = stateRef.state.accessFlags;
1274 cb.src.pipelineStageFlags =
1275 stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1276 cb.src.optionalImageLayout = srcImageLayout;
1277 cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1278 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1279 }
1280 UpdateImageResourceState(stateRef, params, cb);
1281 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1282 if (isAddMips) {
1283 const BindableImage image {
1284 cb.resourceHandle,
1285 cb.dst.optionalImageSubresourceRange.baseMipLevel,
1286 cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1287 cb.dst.optionalImageLayout,
1288 RenderHandle {},
1289 };
1290 ModifyAdditionalImageState(image, stateRef.additionalState);
1291 }
1292 }
1293 params.handledCustomBarriers[cb.resourceHandle] = 0;
1294 }
1295 }
1296 }
1297
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1298 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1299 const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1300 {
1301 for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1302 const uint32_t barrierIndex = barrierIndexBegin + idx;
1303 PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1304 if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1305 const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1306 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1307 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1308 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1309 UpdateStateAndCreateBarriersGpuBuffer(
1310 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1311 }
1312 }
1313 }
1314
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1315 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1316 const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1317 {
1318 for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1319 const uint32_t barrierIndex = barrierIndexBegin + idx;
1320 PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1321 if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1322 const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1323 const bool needsArgsBarrier =
1324 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1325 if (needsArgsBarrier) {
1326 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1327 CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1328 UpdateStateAndCreateBarriersGpuBuffer(
1329 resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1330 }
1331 }
1332 }
1333 }
1334
HandleRenderPassImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1335 void RenderGraph::HandleRenderPassImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1336 const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1337 {
1338 const uint32_t nextListIdx = commandListCommandIndex + 1;
1339 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1340 const auto& nextCmdRef = cmdListRef[nextListIdx];
1341 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BEGIN_RENDER_PASS);
1342
1343 const RenderCommandBeginRenderPass& nextRc = *static_cast<RenderCommandBeginRenderPass*>(nextCmdRef.rc);
1344 // check for all attachments
1345 const RenderPassDesc& rpDesc = nextRc.renderPassDesc;
1346 for (uint32_t attachIdx = 0U; attachIdx < rpDesc.attachmentCount; ++attachIdx) {
1347 const RenderHandle handle = rpDesc.attachmentHandles[attachIdx];
1348 const bool needsBarrier = CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1349 if (needsBarrier) {
1350 const bool depthImage = RenderHandleUtil::IsDepthImage(handle);
1351 BindableImage bRes = {};
1352 bRes.handle = handle;
1353 bRes.imageLayout = depthImage ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
1354 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1355 const AccessFlags accessFlags =
1356 depthImage ? CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT : CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1357 const PipelineStageFlags pipelineStageFlags = CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1358
1359 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1360 GpuResourceState { 0, accessFlags, pipelineStageFlags, params.gpuQueue }, bRes, params.rcWithType,
1361 params.combinedBarriers, currNodeGpuResourceTransfers_);
1362 }
1363 }
1364 }
1365
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1366 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1367 const array_view<const RenderCommandWithType>& cmdListRef)
1368 {
1369 const uint32_t nextListIdx = commandListCommandIndex + 1;
1370 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1371 const auto& nextCmdRef = cmdListRef[nextListIdx];
1372 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1373
1374 const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1375
1376 const bool needsBarrier =
1377 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1378 if (needsBarrier) {
1379 BindableImage bRes = {};
1380 bRes.handle = nextRc.handle;
1381 bRes.imageLayout = nextRc.imageLayout;
1382 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1383 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1384 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1385 }
1386 }
1387
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1388 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1389 const array_view<const RenderCommandWithType>& cmdListRef)
1390 {
1391 const uint32_t nextListIdx = commandListCommandIndex + 1;
1392 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1393 const auto& nextCmdRef = cmdListRef[nextListIdx];
1394 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1395
1396 const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1397
1398 const bool needsSrcBarrier =
1399 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1400 if (needsSrcBarrier) {
1401 BindableImage bRes = {};
1402 bRes.handle = nextRc.srcHandle;
1403 bRes.imageLayout = nextRc.srcImageLayout;
1404 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1405 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1406 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1407 }
1408
1409 const bool needsDstBarrier =
1410 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1411 if (needsDstBarrier) {
1412 BindableImage bRes = {};
1413 bRes.handle = nextRc.dstHandle;
1414 bRes.imageLayout = nextRc.dstImageLayout;
1415 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1416 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1417 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1418 }
1419 }
1420
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1421 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1422 const array_view<const RenderCommandWithType>& cmdListRef)
1423 {
1424 const uint32_t nextListIdx = commandListCommandIndex + 1;
1425 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1426 const auto& nextCmdRef = cmdListRef[nextListIdx];
1427 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1428
1429 const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1430
1431 const bool needsSrcBarrier =
1432 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1433 if (needsSrcBarrier) {
1434 const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1435 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1436 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1437 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1438 }
1439
1440 const bool needsDstBarrier =
1441 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1442 if (needsDstBarrier) {
1443 const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1444 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1445 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1446 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1447 }
1448 }
1449
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1450 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1451 const array_view<const RenderCommandWithType>& cmdListRef)
1452 {
1453 const uint32_t nextListIdx = commandListCommandIndex + 1;
1454 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1455 const auto& nextCmdRef = cmdListRef[nextListIdx];
1456 PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1457 (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1458
1459 // NOTE: two different command types supported
1460 RenderHandle srcHandle;
1461 RenderHandle dstHandle;
1462 ImageSubresourceLayers srcImgLayers;
1463 ImageSubresourceLayers dstImgLayers;
1464 if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1465 const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1466 PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1467 srcHandle = nextRc.srcHandle;
1468 dstHandle = nextRc.dstHandle;
1469 srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1470 dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1471 } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1472 const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1473 srcHandle = nextRc.srcHandle;
1474 dstHandle = nextRc.dstHandle;
1475 srcImgLayers = nextRc.imageCopy.srcSubresource;
1476 dstImgLayers = nextRc.imageCopy.dstSubresource;
1477 }
1478
1479 const bool needsSrcBarrier =
1480 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1481 if (needsSrcBarrier) {
1482 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1483 PLUGIN_UNUSED(handleType);
1484 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1485 if (handleType == RenderHandleType::GPU_BUFFER) {
1486 BindableBuffer bRes;
1487 bRes.handle = srcHandle;
1488 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1489 GpuResourceState {
1490 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1491 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1492 } else {
1493 BindableImage bRes;
1494 bRes.handle = srcHandle;
1495 bRes.mip = srcImgLayers.mipLevel;
1496 bRes.layer = srcImgLayers.baseArrayLayer;
1497 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1498 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1499 GpuResourceState {
1500 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1501 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1502 }
1503 }
1504
1505 const bool needsDstBarrier =
1506 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1507 if (needsDstBarrier) {
1508 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1509 PLUGIN_UNUSED(handleType);
1510 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1511 if (handleType == RenderHandleType::GPU_BUFFER) {
1512 BindableBuffer bRes;
1513 bRes.handle = dstHandle;
1514 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1515 GpuResourceState {
1516 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1517 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1518 } else {
1519 BindableImage bRes;
1520 bRes.handle = dstHandle;
1521 bRes.mip = dstImgLayers.mipLevel;
1522 bRes.layer = dstImgLayers.baseArrayLayer;
1523 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1524 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1525 GpuResourceState {
1526 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1527 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1528 }
1529 }
1530 }
1531
HandleBuildAccelerationStructure(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1532 void RenderGraph::HandleBuildAccelerationStructure(ParameterCache& params, const uint32_t& commandListCommandIndex,
1533 const array_view<const RenderCommandWithType>& cmdListRef)
1534 {
1535 const uint32_t nextListIdx = commandListCommandIndex + 1;
1536 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1537 const auto& nextCmdRef = cmdListRef[nextListIdx];
1538 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BUILD_ACCELERATION_STRUCTURE);
1539
1540 const RenderCommandBuildAccelerationStructure& nextRc =
1541 *static_cast<RenderCommandBuildAccelerationStructure*>(nextCmdRef.rc);
1542
1543 for (const auto& instancesRef : nextRc.instancesView) {
1544 // usually the bottom level which needs to be waited to be finished
1545 const RenderHandle handle = instancesRef.data.handle;
1546 const bool needsBarrier = CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1547 if (needsBarrier) {
1548 const BindableBuffer bRes = { handle, 0U, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1549 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1550 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1551 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1552 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1553 }
1554 }
1555
1556 const auto& geometry = nextRc.geometry;
1557
1558 // NOTE: mostly empty at the moment
1559 const bool needsSrcBarrier =
1560 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, geometry.srcAccelerationStructure);
1561 if (needsSrcBarrier) {
1562 const BindableBuffer bRes = { geometry.srcAccelerationStructure, 0U,
1563 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1564 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1565 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1566 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1567 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1568 }
1569
1570 const bool needsDstBarrier =
1571 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, geometry.dstAccelerationStructure);
1572 if (needsDstBarrier) {
1573 const BindableBuffer bRes = { geometry.dstAccelerationStructure, 0U,
1574 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1575 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1576 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT,
1577 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1578 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1579 }
1580 }
1581
HandleCopyAccelerationStructureInstances(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1582 void RenderGraph::HandleCopyAccelerationStructureInstances(ParameterCache& params,
1583 const uint32_t& commandListCommandIndex, const array_view<const RenderCommandWithType>& cmdListRef)
1584 {
1585 const uint32_t nextListIdx = commandListCommandIndex + 1;
1586 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1587 const auto& nextCmdRef = cmdListRef[nextListIdx];
1588 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_ACCELERATION_STRUCTURE_INSTANCES);
1589
1590 const RenderCommandCopyAccelerationStructureInstances& nextRc =
1591 *static_cast<RenderCommandCopyAccelerationStructureInstances*>(nextCmdRef.rc);
1592
1593 // NOTE: nextRc.destination.handle will be copied on CPU, no barriers needed
1594
1595 for (const auto& instancesRef : nextRc.instancesView) {
1596 const RenderHandle handle = instancesRef.accelerationStructure;
1597 const bool needsSrcBarrier =
1598 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, handle);
1599 if (needsSrcBarrier) {
1600 const BindableBuffer bRes = { handle, 0U, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1601 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1602 GpuResourceState { 0, CORE_ACCESS_ACCELERATION_STRUCTURE_READ_BIT,
1603 CORE_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, params.gpuQueue },
1604 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1605 }
1606 }
1607 }
1608
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1609 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1610 const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1611 {
1612 const uint32_t nextListIdx = commandListCommandIndex + 1;
1613 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1614 const auto& nextCmdRef = cmdListRef[nextListIdx];
1615 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1616
1617 const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1618
1619 const bool needsArgsBarrier =
1620 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1621 if (needsArgsBarrier) {
1622 const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1623 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1624 GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1625 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1626 bRes, params.combinedBarriers, currNodeGpuResourceTransfers_);
1627 }
1628 }
1629
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1630 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1631 const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1632 const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1633 {
1634 for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1635 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) != RenderHandleType::DESCRIPTOR_SET) {
1636 continue;
1637 }
1638
1639 // NOTE: for global descriptor sets we didn't know with render command list if it had dynamic resources
1640 const uint32_t additionalData = RenderHandleUtil::GetAdditionalData(descriptorSetHandle);
1641 if (additionalData & NodeContextDescriptorSetManager::GLOBAL_DESCRIPTOR_BIT) {
1642 if (!nodeDescriptorSetMgrRef.HasDynamicBarrierResources(descriptorSetHandle)) {
1643 continue;
1644 }
1645 }
1646
1647 const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1648 const auto& buffers = bindingResources.buffers;
1649 const auto& images = bindingResources.images;
1650 for (const auto& refBuf : buffers) {
1651 const auto& ref = refBuf.desc;
1652 const uint32_t descriptorCount = ref.binding.descriptorCount;
1653 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1654 if (descriptorCount == 0) {
1655 continue;
1656 }
1657 const uint32_t arrayOffset = ref.arrayOffset;
1658 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1659 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1660 // first is the ref, starting from 1 we use array offsets
1661 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1].desc;
1662 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1663 UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1664 }
1665 }
1666 }
1667 for (const auto& refImg : images) {
1668 const auto& ref = refImg.desc;
1669 const uint32_t descriptorCount = ref.binding.descriptorCount;
1670 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1671 if (descriptorCount == 0) {
1672 continue;
1673 }
1674 const uint32_t arrayOffset = ref.arrayOffset;
1675 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1676 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1677 // first is the ref, starting from 1 we use array offsets
1678 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1].desc;
1679 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount,
1680 bRes.resource.handle)) {
1681 UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1682 }
1683 }
1684 }
1685 } // end for
1686 }
1687
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1688 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1689 const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1690 {
1691 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1692 if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1693 return;
1694 }
1695
1696 auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1697 // NOTE: we previous patched the final render pass layouts here
1698 // ATM: we only path the swapchain image if needed
1699
1700 const GpuResourceState& prevState = ref.state;
1701 const BindableImage& prevImage = ref.resource;
1702 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1703 const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1704 : GetSrcImageBarrier(prevState, prevImage);
1705
1706 const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1707 // NOTE: we are not interested in access flags, only write access interests us
1708 // not this (prevStateRb.accessFlags NOT state.accessFlags)
1709 const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (state.accessFlags & WRITE_ACCESS_FLAGS);
1710 const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1711 // input attachments are handled with render passes and not with barriers
1712 if ((layoutChanged || writeTarget) && (!inputAttachment)) {
1713 if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1714 (prevState.gpuQueue.type != state.gpuQueue.type)) {
1715 PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1716
1717 PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1718 currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1719 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1720 } else {
1721 const ResourceBarrier dstImageBarrier =
1722 addMips ? GetDstImageBarrierMips(state, prevImage, res) : GetDstImageBarrier(state, res);
1723 params.combinedBarriers.push_back(
1724 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1725 }
1726
1727 ref.state = state;
1728 ref.resource = res;
1729 ref.prevRc = params.rcWithType;
1730 ref.prevRenderNodeIndex = params.renderNodeIndex;
1731 if (addMips) {
1732 ModifyAdditionalImageState(res, ref.additionalState);
1733 }
1734 }
1735 }
1736
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1737 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1738 const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1739 {
1740 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1741 if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1742 return;
1743 }
1744
1745 // get the current state of the buffer
1746 auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1747 const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1748 // if previous or current state is write -> barrier
1749 if ((prevStateRb.accessFlags & WRITE_ACCESS_FLAGS) || (dstState.accessFlags & WRITE_ACCESS_FLAGS)) {
1750 params.combinedBarriers.push_back(CommandBarrier {
1751 res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1752 }
1753
1754 // update the cached state to match the situation after the barrier
1755 srcStateRef.state = dstState;
1756 srcStateRef.resource = res;
1757 srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1758 }
1759
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1760 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1761 const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer, vector<CommandBarrier>& barriers,
1762 vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1763 {
1764 auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1765 const GpuResourceState srcState = stateRef.state;
1766 const BindableBuffer srcBuffer = stateRef.resource;
1767
1768 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1769 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1770 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1771 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1772 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1773 currNodeGpuResourceTransfer.push_back(
1774 RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1775 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1776 } else {
1777 const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1778 const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1779
1780 barriers.push_back(CommandBarrier {
1781 newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1782 }
1783
1784 stateRef.state = newGpuResourceState;
1785 stateRef.resource = newBuffer;
1786 stateRef.prevRenderNodeIndex = renderNodeIndex;
1787 }
1788
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1789 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1790 const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1791 vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1792 {
1793 // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1794
1795 auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1796 const GpuResourceState srcState = stateRef.state;
1797 const BindableImage srcImage = stateRef.resource;
1798 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1799
1800 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1801 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1802 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1803 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1804 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1805 currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1806 stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1807 } else {
1808 const ResourceBarrier srcBarrier =
1809 addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1810 : GetSrcImageBarrier(srcState, srcImage);
1811 const ResourceBarrier dstBarrier = addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage)
1812 : GetDstImageBarrier(newGpuResourceState, newImage);
1813
1814 barriers.push_back(CommandBarrier {
1815 newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1816 }
1817
1818 stateRef.state = newGpuResourceState;
1819 stateRef.resource = newImage;
1820 stateRef.prevRc = rcWithType;
1821 stateRef.prevRenderNodeIndex = renderNodeIndex;
1822 if (addMips) {
1823 ModifyAdditionalImageState(newImage, stateRef.additionalState);
1824 }
1825 }
1826
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1827 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1828 const RenderHandle handle, const GpuQueue& queue)
1829 {
1830 // NOTE: Do not call with non dynamic trackable
1831 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1832 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1833 if (arrayIndex < gpuBufferDataIndices_.size()) {
1834 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1835 uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1836 if (dataIdx == INVALID_TRACK_IDX) {
1837 if (!gpuBufferAvailableIndices_.empty()) {
1838 dataIdx = gpuBufferAvailableIndices_.back();
1839 gpuBufferAvailableIndices_.pop_back();
1840 } else {
1841 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1842 gpuBufferTracking_.emplace_back();
1843 }
1844 gpuBufferDataIndices_[arrayIndex] = dataIdx;
1845
1846 gpuBufferTracking_[dataIdx].resource.handle = handle;
1847 gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1848 }
1849 return gpuBufferTracking_[dataIdx];
1850 }
1851
1852 return defaultBufferState_;
1853 }
1854
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1855 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1856 const RenderHandle handle, const GpuQueue& queue)
1857 {
1858 // NOTE: Do not call with non dynamic trackable
1859 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1860 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1861 if (arrayIndex < gpuImageDataIndices_.size()) {
1862 // NOTE: render pass attachments expected to be dynamic resources always
1863 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1864 uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1865 if (dataIdx == INVALID_TRACK_IDX) {
1866 if (!gpuImageAvailableIndices_.empty()) {
1867 dataIdx = gpuImageAvailableIndices_.back();
1868 gpuImageAvailableIndices_.pop_back();
1869 } else {
1870 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1871 gpuImageTracking_.emplace_back();
1872 }
1873 gpuImageDataIndices_[arrayIndex] = dataIdx;
1874
1875 gpuImageTracking_[dataIdx].resource.handle = handle;
1876 gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1877 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1878 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1879 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1880 }
1881 }
1882 #if (RENDER_VALIDATION_ENABLED == 1)
1883 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1884 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1885 PLUGIN_LOG_ONCE_W("dynamic_state_mips_issue_" + to_string(handle.id),
1886 "RENDER_VALIDATION: Additional mip states missing (handle:%" PRIx64 ")", handle.id);
1887 }
1888 #endif
1889 return gpuImageTracking_[dataIdx];
1890 }
1891
1892 PLUGIN_LOG_ONCE_W("render_graph_image_state_issues", "RenderGraph: Image tracking issue with handle count");
1893 return defaultImageState_;
1894 }
1895 RENDER_END_NAMESPACE()
1896