• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "renderer.h"
17 
18 #include <algorithm>
19 #include <chrono>
20 #include <functional>
21 #include <utility>
22 
23 #include <base/containers/string.h>
24 #include <base/containers/string_view.h>
25 #include <base/containers/unordered_map.h>
26 #include <base/containers/vector.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <render/datastore/intf_render_data_store_manager.h>
29 #include <render/datastore/intf_render_data_store_pod.h>
30 #include <render/datastore/render_data_store_render_pods.h>
31 #include <render/intf_render_context.h>
32 #include <render/intf_renderer.h>
33 #include <render/namespace.h>
34 #include <render/nodecontext/intf_render_node.h>
35 #include <render/render_data_structures.h>
36 
37 #include "perf/cpu_perf_scope.h"
38 
39 #if (RENDER_DEV_ENABLED == 1)
40 #include <cinttypes>
41 #endif
42 
43 #include "datastore/render_data_store_manager.h"
44 #include "device/device.h"
45 #include "device/gpu_resource_manager.h"
46 #include "device/gpu_resource_util.h"
47 #include "device/render_frame_sync.h"
48 #include "device/shader_manager.h"
49 #include "nodecontext/node_context_descriptor_set_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_node_context_manager.h"
52 #include "nodecontext/render_node_graph_manager.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "perf/cpu_timer.h"
55 #include "render_backend.h"
56 #include "render_graph.h"
57 #include "util/log.h"
58 #include "util/render_util.h"
59 
60 using namespace BASE_NS;
61 using namespace CORE_NS;
62 
63 RENDER_BEGIN_NAMESPACE()
64 namespace {
65 // Helper class for running std::function as a ThreadPool task.
66 class FunctionTask final : public IThreadPool::ITask {
67 public:
Create(std::function<void ()> func)68     static Ptr Create(std::function<void()> func)
69     {
70         return Ptr { new FunctionTask(func) };
71     }
72 
FunctionTask(std::function<void ()> func)73     explicit FunctionTask(std::function<void()> func) : func_(func) {};
74 
operator ()()75     void operator()() override
76     {
77         func_();
78     }
79 
80 protected:
Destroy()81     void Destroy() override
82     {
83         delete this;
84     }
85 
86 private:
87     std::function<void()> func_;
88 };
89 
90 #if (RENDER_PERF_ENABLED == 1)
91 struct NodeTimerData {
92     CpuTimer timer;
93     string_view debugName;
94 };
95 #endif
96 
97 struct RenderNodeExecutionParameters {
98     const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
99 #if (RENDER_PERF_ENABLED == 1)
100     vector<NodeTimerData>& nodeTimers;
101 #endif
102     ITaskQueue* queue;
103     IRenderDataStoreManager& renderData;
104     ShaderManager& shaderManager;
105     RenderingConfiguration& renderConfig;
106 };
107 
ProcessShaderReload(Device & device,ShaderManager & shaderMgr,RenderNodeGraphManager & renderNodeGraphMgr,const array_view<const RenderHandle> & renderNodeGraphs)108 void ProcessShaderReload(Device& device, ShaderManager& shaderMgr, RenderNodeGraphManager& renderNodeGraphMgr,
109     const array_view<const RenderHandle>& renderNodeGraphs)
110 {
111     if (shaderMgr.HasReloadedShaders()) {
112         device.WaitForIdle();
113         // NOTE: would be better to force pso re-creation based on low-level handle, but cannot be done at the moment
114         PLUGIN_LOG_I("RENDER_PERFORMANCE_WARNING: re-init render nodes because of reloaded shaders");
115         for (const auto& ref : renderNodeGraphs) {
116             RenderNodeGraphNodeStore* nodeStore = renderNodeGraphMgr.Get(ref);
117             if (nodeStore) {
118                 nodeStore->initialized = false;
119                 for (auto& nodeContextRef : nodeStore->renderNodeContextData) {
120                     nodeContextRef.initialized = false; // re-init all nodes
121                 }
122             }
123         }
124     }
125 }
126 
CreateBackBufferGpuBufferRenderNodeGraph(RenderNodeGraphManager & renderNodeGraphMgr)127 RenderHandleReference CreateBackBufferGpuBufferRenderNodeGraph(RenderNodeGraphManager& renderNodeGraphMgr)
128 {
129     RenderNodeGraphDesc rngd;
130     rngd.renderNodeGraphName = "CORE_RNG_BACKBUFFER_GPUBUFFER";
131     RenderNodeDesc rnd;
132     rnd.typeName = "CORE_RN_BACKBUFFER_GPUBUFFER";
133     rnd.nodeName = "CORE_RN_BACKBUFFER_GPUBUFFER_I";
134     rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
135     rngd.nodes.emplace_back(move(rnd));
136 
137     return renderNodeGraphMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
138 }
139 
140 // Helper for Renderer::InitNodeGraph
InitializeRenderNodeContextData(IRenderContext & renderContext,RenderNodeGraphNodeStore & nodeStore,const bool enableMultiQueue,const RenderingConfiguration & renderConfig)141 unordered_map<string, uint32_t> InitializeRenderNodeContextData(IRenderContext& renderContext,
142     RenderNodeGraphNodeStore& nodeStore, const bool enableMultiQueue, const RenderingConfiguration& renderConfig)
143 {
144     unordered_map<string, uint32_t> renderNodeNameToIndex(nodeStore.renderNodeData.size());
145     vector<ContextInitDescription> contextInitDescs(nodeStore.renderNodeData.size());
146     for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
147         const auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
148         PLUGIN_ASSERT(renderNodeData.inputData);
149         PLUGIN_ASSERT(renderNodeData.node);
150         auto& inputData = *(renderNodeData.inputData);
151         auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
152 
153         renderNodeNameToIndex[renderNodeData.fullName] = (uint32_t)nodeIdx;
154 
155         // reset always, dependencies are redone with new nodes
156         nodeContextData.submitInfo.signalSemaphore = false;
157         nodeContextData.submitInfo.waitSemaphoreCount = 0;
158         nodeContextData.submitInfo.waitForSwapchainAcquireSignal = false;
159 
160         // with dynamic render node graphs, single nodes can be initialized
161         // set to true when doing the renderNode->InitNode();
162         if (nodeContextData.initialized) {
163             continue;
164         }
165 
166         auto& contextInitRef = contextInitDescs[nodeIdx];
167         contextInitRef.requestedQueue = inputData.queue;
168 
169         Device& device = (Device&)renderContext.GetDevice();
170         contextInitRef.requestedQueue = device.GetValidGpuQueue(contextInitRef.requestedQueue);
171 
172         ShaderManager& shaderMgr = (ShaderManager&)renderContext.GetDevice().GetShaderManager();
173         GpuResourceManager& gpuResourceMgr = (GpuResourceManager&)renderContext.GetDevice().GetGpuResourceManager();
174         // ordering is important
175         nodeContextData.nodeContextPsoMgr = make_unique<NodeContextPsoManager>(device, shaderMgr);
176         nodeContextData.nodeContextDescriptorSetMgr = device.CreateNodeContextDescriptorSetManager();
177         nodeContextData.renderCommandList = make_unique<RenderCommandList>(*nodeContextData.nodeContextDescriptorSetMgr,
178             gpuResourceMgr, *nodeContextData.nodeContextPsoMgr, contextInitRef.requestedQueue, enableMultiQueue);
179         nodeContextData.contextPoolMgr =
180             device.CreateNodeContextPoolManager(gpuResourceMgr, contextInitRef.requestedQueue);
181         RenderNodeGraphData rngd = { nodeStore.renderNodeGraphName, nodeStore.renderNodeGraphDataStoreName,
182             renderConfig };
183         RenderNodeContextManager::CreateInfo rncmci { renderContext, rngd, *renderNodeData.inputData,
184             renderNodeData.nodeName, renderNodeData.nodeJson, *nodeStore.renderNodeGpuResourceMgr,
185             *nodeContextData.nodeContextDescriptorSetMgr, *nodeContextData.nodeContextPsoMgr,
186             *nodeContextData.renderCommandList, *nodeStore.renderNodeGraphShareDataMgr };
187         nodeContextData.renderNodeContextManager = make_unique<RenderNodeContextManager>(rncmci);
188 #if ((RENDER_VALIDATION_ENABLED == 1) || (RENDER_VULKAN_VALIDATION_ENABLED == 1))
189         nodeContextData.nodeContextDescriptorSetMgr->SetValidationDebugName(renderNodeData.fullName);
190         nodeContextData.contextPoolMgr->SetValidationDebugName(renderNodeData.fullName);
191 #endif
192         nodeContextData.renderBarrierList = make_unique<RenderBarrierList>(
193             (contextInitRef.requestedQueue.type != GpuQueue::QueueType::UNDEFINED) ? 4u : 0u);
194     }
195     return renderNodeNameToIndex;
196 }
197 
198 // Helper for Renderer::InitNodeGraph
PatchSignaling(RenderNodeGraphNodeStore & nodeStore,const unordered_map<string,uint32_t> & renderNodeNameToIndex)199 void PatchSignaling(RenderNodeGraphNodeStore& nodeStore, const unordered_map<string, uint32_t>& renderNodeNameToIndex)
200 {
201     PLUGIN_ASSERT(renderNodeNameToIndex.size() == nodeStore.renderNodeData.size());
202     for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
203         PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
204         const auto& nodeInputDataRef = *(nodeStore.renderNodeData[nodeIdx].inputData);
205         auto& submitInfo = nodeStore.renderNodeContextData[nodeIdx].submitInfo;
206 
207         for (const auto& nodeNameRef : nodeInputDataRef.gpuQueueWaitForSignals.nodeNames) {
208             if (const auto iter = renderNodeNameToIndex.find(nodeNameRef); iter != renderNodeNameToIndex.cend()) {
209                 if (submitInfo.waitSemaphoreCount < PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS) {
210                     const uint32_t index = iter->second;
211                     // mark node to signal
212                     nodeStore.renderNodeContextData[index].submitInfo.signalSemaphore = true;
213 
214                     submitInfo.waitSemaphoreNodeIndices[submitInfo.waitSemaphoreCount] = index;
215                     submitInfo.waitSemaphoreCount++;
216                 } else {
217                     PLUGIN_LOG_E("render node can wait only for (%u) render node signals",
218                         PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS);
219                     PLUGIN_ASSERT(false);
220                 }
221             } else {
222                 PLUGIN_LOG_E("invalid render node wait signal dependency");
223                 PLUGIN_ASSERT(false);
224             }
225         }
226     }
227 }
228 
229 // Helper for Renderer::RenderFrame
BeginRenderNodeGraph(const vector<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const RenderNodeContextManager::PerFrameTimings & timings)230 void BeginRenderNodeGraph(const vector<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
231     const RenderNodeContextManager::PerFrameTimings& timings)
232 {
233     for (const RenderNodeGraphNodeStore* renderNodeDataStore : renderNodeGraphNodeStores) {
234         const uint32_t renderNodeCount = static_cast<uint32_t>(renderNodeDataStore->renderNodeContextData.size());
235         auto& rngShareData = renderNodeDataStore->renderNodeGraphShareData;
236         renderNodeDataStore->renderNodeGraphShareDataMgr->BeginFrame(renderNodeCount,
237             { rngShareData.inputs, rngShareData.inputCount }, { rngShareData.outputs, rngShareData.outputCount });
238         for (uint32_t idx = 0; idx < renderNodeCount; ++idx) {
239             const RenderNodeContextData& contextData = renderNodeDataStore->renderNodeContextData[idx];
240             contextData.renderCommandList->BeginFrame();
241             contextData.renderBarrierList->BeginFrame();
242             contextData.contextPoolMgr->BeginFrame();
243             contextData.nodeContextDescriptorSetMgr->BeginFrame();
244             contextData.renderNodeContextManager->BeginFrame(idx, timings);
245         }
246     }
247 }
248 
249 // Helper for Renderer::RenderFrame
GetRenderNodeGraphNodeStores(array_view<const RenderHandle> inputs,RenderNodeGraphManager & renderNodeGraphMgr)250 inline vector<RenderNodeGraphNodeStore*> GetRenderNodeGraphNodeStores(
251     array_view<const RenderHandle> inputs, RenderNodeGraphManager& renderNodeGraphMgr)
252 {
253     vector<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
254     renderNodeGraphNodeStores.reserve(inputs.size());
255     for (auto const& input : inputs) {
256         renderNodeGraphNodeStores.emplace_back(renderNodeGraphMgr.Get(input));
257     }
258     return renderNodeGraphNodeStores;
259 }
260 
261 // Helper for Renderer::RenderFrame
WaitForFence(const Device & device,RenderFrameSync & renderFrameSync)262 inline bool WaitForFence(const Device& device, RenderFrameSync& renderFrameSync)
263 {
264     RENDER_CPU_PERF_SCOPE("Renderer", "Renderer", "WaitForFrameFence_Cpu");
265     renderFrameSync.WaitForFrameFence();
266 
267     return device.GetDeviceStatus();
268 }
269 
270 // Helper for Renderer::RenderFrame
ProcessRenderNodeGraph(const Device & device,RenderGraph & renderGraph,array_view<RenderNodeGraphNodeStore * > graphNodeStoreView)271 inline void ProcessRenderNodeGraph(
272     const Device& device, RenderGraph& renderGraph, array_view<RenderNodeGraphNodeStore*> graphNodeStoreView)
273 {
274     RENDER_CPU_PERF_SCOPE("Renderer", "Renderer", "RenderGraph_Cpu");
275     const RenderHandle backbufferHandle = device.GetBackbufferHandle();
276     renderGraph.ProcessRenderNodeGraph(backbufferHandle, graphNodeStoreView);
277 }
278 
279 // Helper for Renderer::ExecuteRenderNodes
CreateGpuResourcesWithRenderNodes(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,IRenderDataStoreManager & renderData,ShaderManager & shaderMgr)280 void CreateGpuResourcesWithRenderNodes(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
281     IRenderDataStoreManager& renderData, ShaderManager& shaderMgr)
282 {
283     for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
284         PLUGIN_ASSERT(renderNodeGraphNodeStores[graphIdx]);
285 
286         RenderNodeGraphNodeStore const& nodeStore = *renderNodeGraphNodeStores[graphIdx];
287         for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
288             IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
289             renderNode.PreExecuteFrame();
290         }
291     }
292 }
293 
294 // Helper for Renderer::ExecuteRenderNodes
RenderNodeExecution(RenderNodeExecutionParameters & params)295 void RenderNodeExecution(RenderNodeExecutionParameters& params)
296 {
297 #if (RENDER_PERF_ENABLED == 1)
298     size_t allNodeIdx = 0;
299 #endif
300     uint64_t taskId = 0;
301     for (const auto* nodeStorePtr : params.renderNodeGraphNodeStores) {
302         PLUGIN_ASSERT(nodeStorePtr);
303         const auto& nodeStore = *nodeStorePtr;
304 
305         for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
306             PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].node);
307             IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
308             RenderNodeContextData const& renderNodeContextData = nodeStore.renderNodeContextData[nodeIdx];
309             RenderCommandList& renderCommandList = *renderNodeContextData.renderCommandList;
310 
311 #if (RENDER_PERF_ENABLED == 1)
312             auto& timerRef = params.nodeTimers[allNodeIdx];
313             timerRef.debugName = nodeStore.renderNodeData[nodeIdx].fullName;
314             params.queue->Submit(taskId++, FunctionTask::Create([&timerRef, &renderNode, &renderCommandList]() {
315                 timerRef.timer.Begin();
316 
317                 renderCommandList.BeforeRenderNodeExecuteFrame();
318                 renderNode.ExecuteFrame(renderCommandList);
319                 renderCommandList.AfterRenderNodeExecuteFrame();
320 
321                 timerRef.timer.End();
322             }));
323             allNodeIdx++;
324 #else
325             params.queue->Submit(taskId++, FunctionTask::Create([&renderCommandList, &renderNode]() {
326                 renderCommandList.BeforeRenderNodeExecuteFrame();
327                 renderNode.ExecuteFrame(renderCommandList);
328                 renderCommandList.AfterRenderNodeExecuteFrame();
329             }));
330 #endif
331         }
332     }
333 
334     // Execute and wait for completion.
335     params.queue->Execute();
336 }
337 
338 // Helper for Renderer::ExecuteRenderBackend
IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,RenderCommandFrameData & rcfd,const bool & multiQueueEnabled)339 void IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
340     RenderCommandFrameData& rcfd, const bool& multiQueueEnabled)
341 {
342     for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
343         PLUGIN_ASSERT(renderNodeGraphNodeStores[graphIdx]);
344 
345         RenderNodeGraphNodeStore const& nodeStore = *renderNodeGraphNodeStores[graphIdx];
346 
347         unordered_map<uint32_t, uint32_t> nodeIdxToRenderCommandContextIdx;
348         const uint32_t multiQueuePatchBeginIdx = (uint32_t)rcfd.renderCommandContexts.size();
349         uint32_t multiQueuePatchCount = 0;
350         if (multiQueueEnabled) {
351             nodeIdxToRenderCommandContextIdx.reserve(nodeStore.renderNodeContextData.size());
352         }
353 
354         for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeContextData.size(); ++nodeIdx) {
355             const auto& ref = nodeStore.renderNodeContextData[nodeIdx];
356             PLUGIN_ASSERT((ref.renderCommandList != nullptr) && (ref.renderBarrierList != nullptr) &&
357                           (ref.nodeContextPsoMgr != nullptr) && (ref.contextPoolMgr != nullptr));
358             const bool valid = (ref.renderCommandList->HasValidRenderCommands()) ? true : false;
359             if (valid) {
360                 if (multiQueueEnabled) {
361                     nodeIdxToRenderCommandContextIdx[(uint32_t)nodeIdx] = (uint32_t)rcfd.renderCommandContexts.size();
362                     multiQueuePatchCount++;
363                 }
364 
365                 rcfd.renderCommandContexts.push_back({ ref.renderBackendNode, ref.renderCommandList.get(),
366                     ref.renderBarrierList.get(), ref.nodeContextPsoMgr.get(), ref.nodeContextDescriptorSetMgr.get(),
367                     ref.contextPoolMgr.get(), ref.renderCommandList->HasMultiRenderCommandListSubpasses(),
368                     ref.renderCommandList->GetMultiRenderCommandListSubpassCount(), (uint32_t)nodeIdx, ref.submitInfo,
369                     nodeStore.renderNodeData[nodeIdx].fullName });
370             }
371         }
372 
373         if (multiQueueEnabled) { // patch correct render command context indices
374             for (uint32_t idx = multiQueuePatchBeginIdx; idx < multiQueuePatchCount; ++idx) {
375                 auto& ref = rcfd.renderCommandContexts[idx];
376                 const auto& nodeContextRef = nodeStore.renderNodeContextData[ref.renderGraphRenderNodeIndex];
377 
378                 ref.submitDepencies.signalSemaphore = nodeContextRef.submitInfo.signalSemaphore;
379                 ref.submitDepencies.waitSemaphoreCount = nodeContextRef.submitInfo.waitSemaphoreCount;
380                 for (uint32_t waitIdx = 0; waitIdx < ref.submitDepencies.waitSemaphoreCount; ++waitIdx) {
381                     const uint32_t currRenderNodeIdx = nodeContextRef.submitInfo.waitSemaphoreNodeIndices[waitIdx];
382                     PLUGIN_ASSERT(nodeIdxToRenderCommandContextIdx.count(currRenderNodeIdx) == 1);
383 
384                     ref.submitDepencies.waitSemaphoreNodeIndices[waitIdx] =
385                         nodeIdxToRenderCommandContextIdx[currRenderNodeIdx];
386                 }
387             }
388         }
389     }
390 }
391 
392 template<typename T>
IsNull(T * ptr)393 inline bool IsNull(T* ptr)
394 {
395     return ptr == nullptr;
396 }
397 
GetTimeStampNow()398 inline int64_t GetTimeStampNow()
399 {
400     using namespace std::chrono;
401     using Clock = system_clock;
402     return Clock::now().time_since_epoch().count();
403 }
404 } // namespace
405 
Renderer(IRenderContext & context)406 Renderer::Renderer(IRenderContext& context)
407     : renderContext_(context), device_(static_cast<Device&>(context.GetDevice())),
408       gpuResourceMgr_(static_cast<GpuResourceManager&>(device_.GetGpuResourceManager())),
409       shaderMgr_(static_cast<ShaderManager&>(device_.GetShaderManager())),
410       renderNodeGraphMgr_(static_cast<RenderNodeGraphManager&>(context.GetRenderNodeGraphManager())),
411       renderDataStoreMgr_(static_cast<RenderDataStoreManager&>(context.GetRenderDataStoreManager())),
412       renderUtil_(static_cast<RenderUtil&>(context.GetRenderUtil()))
413 
414 {
415     const auto factory = GetInstance<ITaskQueueFactory>(UID_TASK_QUEUE_FACTORY);
416     threadPool_ = factory->CreateThreadPool(factory->GetNumberOfCores());
417     parallelQueue_ = factory->CreateParallelTaskQueue(threadPool_);
418     sequentialQueue_ = factory->CreateSequentialTaskQueue(threadPool_);
419 
420     renderConfig_ = { device_.GetBackendType(), RenderingConfiguration::NdcOrigin::TOP_LEFT };
421 #if ((RENDER_HAS_GL_BACKEND) || (RENDER_HAS_GLES_BACKEND)) && (RENDER_GL_FLIP_Y_SWAPCHAIN == 0)
422     // The flag is for informative purposes only.
423     if ((renderConfig_.renderBackend == DeviceBackendType::OPENGL) ||
424         (renderConfig_.renderBackend == DeviceBackendType::OPENGLES)) {
425         renderConfig_.ndcOrigin = RenderingConfiguration::NdcOrigin::BOTTOM_LEFT;
426     }
427 #endif
428 
429     renderGraph_ = make_unique<RenderGraph>(gpuResourceMgr_);
430     renderBackend_ = device_.CreateRenderBackend(gpuResourceMgr_, parallelQueue_);
431     renderFrameSync_ = device_.CreateRenderFrameSync();
432 
433     { // default render node graph for staging
434         RenderNodeGraphDesc rngd;
435         {
436             RenderNodeDesc rnd;
437             rnd.typeName = "CORE_RN_STAGING";
438             rnd.nodeName = "CORE_RN_STAGING_I";
439             rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
440             rngd.nodes.emplace_back(move(rnd));
441         }
442 #if (RENDER_VULKAN_RT_ENABLED == 1)
443         if (device_.GetBackendType() == DeviceBackendType::VULKAN) {
444             RenderNodeDesc rnd;
445             rnd.typeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING";
446             rnd.nodeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING_I";
447             rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
448             rngd.nodes.emplace_back(move(rnd));
449         }
450 #endif
451         defaultStagingRng_ = renderNodeGraphMgr_.Create(
452             IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
453     }
454 }
455 
~Renderer()456 Renderer::~Renderer() {}
457 
InitNodeGraph(RenderHandle renderNodeGraphHandle)458 void Renderer::InitNodeGraph(RenderHandle renderNodeGraphHandle)
459 {
460     auto renderNodeDataStore = renderNodeGraphMgr_.Get(renderNodeGraphHandle);
461     if (!renderNodeDataStore) {
462         return;
463     }
464 
465     RenderNodeGraphNodeStore& nodeStore = *renderNodeDataStore;
466     if (nodeStore.initialized) {
467         return;
468     }
469     nodeStore.initialized = true;
470 
471     // create render node graph specific managers if not created yet
472     if (!nodeStore.renderNodeGpuResourceMgr) {
473         nodeStore.renderNodeGpuResourceMgr = make_unique<RenderNodeGpuResourceManager>(gpuResourceMgr_);
474     }
475 
476     const bool enableMultiQueue = (device_.GetGpuQueueCount() > 1);
477 
478     // serial, initialize render node context data
479     auto renderNodeNameToIndex =
480         InitializeRenderNodeContextData(renderContext_, nodeStore, enableMultiQueue, renderConfig_);
481 
482     if (enableMultiQueue) {
483         // patch gpu queue signaling
484         PatchSignaling(nodeStore, renderNodeNameToIndex);
485     }
486 
487     // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
488     nodeStore.renderNodeGraphShareDataMgr->BeginFrame(static_cast<uint32_t>(nodeStore.renderNodeData.size()),
489         { nodeStore.renderNodeGraphShareData.inputs, nodeStore.renderNodeGraphShareData.inputCount },
490         { nodeStore.renderNodeGraphShareData.outputs, nodeStore.renderNodeGraphShareData.outputCount });
491 
492     for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
493         auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
494         PLUGIN_ASSERT(renderNodeData.node);
495         IRenderNode& renderNode = *(renderNodeData.node);
496         auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
497 
498         if (nodeContextData.initialized) {
499             continue;
500         }
501         nodeContextData.initialized = true;
502         // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
503         const RenderNodeContextManager::PerFrameTimings timings { 0, 0, device_.GetFrameCount() };
504         nodeContextData.renderNodeContextManager->BeginFrame(static_cast<uint32_t>(nodeIdx), timings);
505 
506         RENDER_CPU_PERF_SCOPE("Renderer", "Renderer_InitNode_Cpu", renderNodeData.fullName);
507 
508         PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
509         renderNode.InitNode(*(nodeContextData.renderNodeContextManager));
510     }
511 }
512 
513 // Helper for Renderer::RenderFrame
RemapBackBufferHandle(const IRenderDataStoreManager & renderData)514 void Renderer::RemapBackBufferHandle(const IRenderDataStoreManager& renderData)
515 {
516     const auto* dataStorePod = static_cast<IRenderDataStorePod*>(renderData.GetRenderDataStore("RenderDataStorePod"));
517     if (dataStorePod) {
518         auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
519         const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
520         if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) {
521             PLUGIN_ASSERT(device_.HasSwapchain());
522             const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
523             if (!RenderHandleUtil::IsValid(handle)) {
524                 const RenderHandle backBufferHandle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
525                 const RenderHandle firstSwapchain = gpuResourceMgr_.GetImageRawHandle("CORE_DEFAULT_SWAPCHAIN_0");
526                 gpuResourceMgr_.RemapGpuImageHandle(backBufferHandle, firstSwapchain);
527             }
528         } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE) {
529             const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
530             if (RenderHandleUtil::IsValid(handle) && (bb->backBufferHandle)) {
531                 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle.GetHandle());
532             }
533         } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
534             const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
535             if (RenderHandleUtil::IsValid(handle) && (bb->backBufferHandle) && (bb->gpuBufferHandle)) {
536                 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle.GetHandle());
537             }
538         }
539     }
540 }
541 
RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)542 void Renderer::RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)
543 {
544     Tick();
545     frameTimes_.begin = GetTimeStampNow();
546     RENDER_CPU_PERF_SCOPE("Renderer", "Frame", "RenderFrame");
547 
548     if (device_.GetDeviceStatus() == false) {
549         ProcessTimeStampEnd();
550         PLUGIN_LOG_ONCE_E("invalid_device_status_render_frame", "invalid device for rendering");
551         return;
552     }
553     device_.Activate();
554     device_.FrameStart();
555 
556     renderNodeGraphMgr_.HandlePendingAllocations();
557     renderDataStoreMgr_.PreRender();
558 
559     ProcessShaderReload(device_, shaderMgr_, renderNodeGraphMgr_, renderNodeGraphs);
560     // create new shaders if any created this frame (needs to be called before render node init)
561     shaderMgr_.HandlePendingAllocations();
562 
563     // update render node graphs with default staging and possible dev gui render node graphs
564     const auto renderNodeGraphInputVector = GatherInputs(renderNodeGraphs);
565 
566     const auto renderNodeGraphInputs = array_view(renderNodeGraphInputVector.data(), renderNodeGraphInputVector.size());
567 
568     for (const auto& ref : renderNodeGraphInputs) {
569         InitNodeGraph(ref);
570     }
571     device_.Deactivate();
572 
573     renderGraph_->BeginFrame();
574     renderFrameSync_->BeginFrame();
575 
576     auto graphNodeStores = GetRenderNodeGraphNodeStores(renderNodeGraphInputs, renderNodeGraphMgr_);
577     if (std::any_of(graphNodeStores.begin(), graphNodeStores.end(), IsNull<RenderNodeGraphNodeStore>)) {
578         ProcessTimeStampEnd();
579         PLUGIN_LOG_W("invalid render node graphs for rendering");
580         return;
581     }
582 
583     // NOTE: by node graph name find data
584     // NOTE: deprecate this
585     RemapBackBufferHandle(renderDataStoreMgr_);
586 
587     // NodeContextPoolManagerGLES::BeginFrame may delete FBOs and device must be active.
588     device_.Activate();
589 
590     // begin frame (advance ring buffers etc.)
591     const RenderNodeContextManager::PerFrameTimings timings { previousFrameTime_ - firstTime_, deltaTime_,
592         device_.GetFrameCount() };
593     BeginRenderNodeGraph(graphNodeStores, timings);
594 
595     // synchronize, needed for persistantly mapped gpu buffer writing
596     if (!WaitForFence(device_, *renderFrameSync_)) {
597         device_.Deactivate();
598         return; // possible lost device with frame fence
599     }
600 
601     // gpu resource allocation and deallocation
602     gpuResourceMgr_.HandlePendingAllocations();
603 
604     device_.Deactivate();
605 
606     const auto nodeStoresView = array_view<RenderNodeGraphNodeStore*>(graphNodeStores);
607 
608     ExecuteRenderNodes(renderNodeGraphInputs, nodeStoresView);
609 
610     // render graph process for all render nodes of all render graphs
611     ProcessRenderNodeGraph(device_, *renderGraph_, nodeStoresView);
612 
613     device_.SetLockResourceBackendAccess(true);
614     renderDataStoreMgr_.PreRenderBackend();
615 
616     ExecuteRenderBackend(renderNodeGraphInputs, nodeStoresView);
617 
618     device_.SetLockResourceBackendAccess(false);
619     renderDataStoreMgr_.PostRender();
620 
621     device_.FrameEnd();
622     ProcessTimeStampEnd();
623 }
624 
RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)625 void Renderer::RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)
626 {
627     const auto lock = std::lock_guard(renderMutex_);
628 
629     // add only unique and valid handles to list for rendering
630     vector<RenderHandle> rngs;
631     rngs.reserve(renderNodeGraphs.size());
632     for (size_t iIdx = 0; iIdx < renderNodeGraphs.size(); ++iIdx) {
633         const RenderHandle& handle = renderNodeGraphs[iIdx].GetHandle();
634         bool duplicate = false;
635         for (auto& ref : rngs) {
636             if (ref == handle) {
637                 duplicate = true;
638             }
639         }
640         if ((RenderHandleUtil::GetHandleType(handle) == RenderHandleType::RENDER_NODE_GRAPH) && (!duplicate)) {
641             rngs.emplace_back(handle);
642         }
643 #if (RENDER_VALIDATION_ENABLED == 1)
644         if (duplicate) {
645             PLUGIN_LOG_ONCE_E("renderer_rf_duplicate_rng",
646                 "RENDER_VALIDATION: duplicate render node graphs are not supported (idx: %u, id: %" PRIx64,
647                 static_cast<uint32_t>(iIdx), handle.id);
648         }
649 #endif
650     }
651     RenderFrameImpl(rngs);
652 }
653 
RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)654 void Renderer::RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)
655 {
656     const auto lock = std::lock_guard(deferredMutex_);
657     for (const auto& ref : renderNodeGraphs) {
658         deferredRenderNodeGraphs_.emplace_back(ref);
659     }
660 }
661 
RenderDeferredFrame()662 void Renderer::RenderDeferredFrame()
663 {
664     deferredMutex_.lock();
665     decltype(deferredRenderNodeGraphs_) renderNodeGraphs = move(deferredRenderNodeGraphs_);
666     deferredMutex_.unlock();
667     RenderFrame(renderNodeGraphs);
668 }
669 
ExecuteRenderNodes(const array_view<const RenderHandle> renderNodeGraphInputs,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)670 void Renderer::ExecuteRenderNodes(const array_view<const RenderHandle> renderNodeGraphInputs,
671     const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
672 {
673 #if (RENDER_PERF_ENABLED == 1)
674     RENDER_CPU_PERF_BEGIN(fullExecuteCpuTimer, "Renderer", "Renderer", "ExecuteAllNodes_Cpu");
675 
676     size_t allRenderNodeCount = 0;
677     for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
678         allRenderNodeCount += renderNodeGraphNodeStores[graphIdx]->renderNodeData.size();
679     }
680 
681     vector<NodeTimerData> nodeTimers(allRenderNodeCount);
682 #endif
683 
684     ITaskQueue* queue = nullptr;
685     if (device_.AllowThreadedProcessing()) {
686         queue = parallelQueue_.get();
687     } else {
688         queue = sequentialQueue_.get();
689     }
690 
691     // single threaded gpu resource creation with render nodes
692     CreateGpuResourcesWithRenderNodes(renderNodeGraphNodeStores, renderDataStoreMgr_, shaderMgr_);
693 
694     // lock staging data for this frame
695     gpuResourceMgr_.LockFrameStagingData();
696     // final gpu resource allocation and dealloation before low level engine resource handle lock-up
697     // we do not allocate or deallocate resources after RenderNode::ExecuteFrame()
698     device_.Activate();
699     gpuResourceMgr_.HandlePendingAllocations();
700     device_.Deactivate();
701 
702     // process render node graph render node share preparations
703     for (auto& ref : renderNodeGraphNodeStores) {
704         ref->renderNodeGraphShareDataMgr->PrepareExecuteFrame();
705     }
706 
707     RenderNodeExecutionParameters params = {
708         renderNodeGraphNodeStores,
709 #if (RENDER_PERF_ENABLED == 1)
710         nodeTimers,
711 #endif
712         queue,
713         renderDataStoreMgr_,
714         shaderMgr_,
715         renderConfig_
716     };
717 
718     // multi-threaded render node execution
719     RenderNodeExecution(params);
720 
721     // Remove tasks.
722     queue->Clear();
723 
724 #if (RENDER_PERF_ENABLED == 1)
725     RENDER_CPU_PERF_END(fullExecuteCpuTimer);
726 
727     if (auto* inst = GetInstance<IPerformanceDataManagerFactory>(UID_PERFORMANCE_FACTORY); inst) {
728         if (IPerformanceDataManager* perfData = inst->Get("RenderNode"); perfData) {
729             for (size_t nodeIdx = 0; nodeIdx < nodeTimers.size(); ++nodeIdx) {
730                 const auto& timerRef = nodeTimers[nodeIdx];
731                 perfData->UpdateData(timerRef.debugName, "RenderNodeExecute_Cpu", timerRef.timer.GetMicroseconds());
732             }
733         }
734     }
735 #endif
736 }
737 
ExecuteRenderBackend(const array_view<const RenderHandle> renderNodeGraphInputs,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)738 void Renderer::ExecuteRenderBackend(const array_view<const RenderHandle> renderNodeGraphInputs,
739     const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
740 {
741     size_t allRenderNodeCount = 0;
742     for (const auto nodeStore : renderNodeGraphNodeStores) {
743         PLUGIN_ASSERT(nodeStore);
744         allRenderNodeCount += nodeStore->renderNodeData.size();
745     }
746 
747     RenderCommandFrameData rcfd;
748     PLUGIN_ASSERT(renderFrameSync_);
749     rcfd.renderFrameSync = renderFrameSync_.get();
750     rcfd.renderCommandContexts.reserve(allRenderNodeCount);
751 
752     const bool multiQueueEnabled = (device_.GetGpuQueueCount() > 1u);
753 
754     IterateRenderBackendNodeGraphNodeStores(renderNodeGraphNodeStores, rcfd, multiQueueEnabled);
755 
756     // NOTE: by node graph name
757     // NOTE: deprecate this
758     const RenderGraph::BackbufferState bbState = renderGraph_->GetBackbufferResourceState();
759     RenderBackendBackBufferConfiguration config { bbState.state, bbState.layout, {} };
760     {
761         auto const dataStorePod =
762             static_cast<IRenderDataStorePod const*>(renderDataStoreMgr_.GetRenderDataStore("RenderDataStorePod"));
763         if (dataStorePod) {
764             auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
765             const NodeGraphBackBufferConfiguration* bb = (const NodeGraphBackBufferConfiguration*)dataView.data();
766             config.config = *bb;
767         }
768     }
769 
770     if (!rcfd.renderCommandContexts.empty()) { // do not execute backend with zero work
771         device_.SetRenderBackendRunning(true);
772         frameTimes_.beginBackend = GetTimeStampNow();
773         device_.Activate();
774         renderBackend_->Render(rcfd, config);
775         frameTimes_.beginBackendPresent = GetTimeStampNow();
776         renderBackend_->Present(config);
777         device_.Deactivate();
778         frameTimes_.endBackend = GetTimeStampNow();
779         device_.SetRenderBackendRunning(false);
780     }
781 
782     device_.Activate();
783     gpuResourceMgr_.EndFrame();
784     device_.Deactivate();
785 }
786 
GatherInputs(const array_view<const RenderHandle> renderNodeGraphInputList)787 vector<RenderHandle> Renderer::GatherInputs(const array_view<const RenderHandle> renderNodeGraphInputList)
788 {
789     vector<RenderHandle> renderNodeGraphInputsVector;
790     size_t defaultRenderNodeGraphCount = 1;
791 #if (RENDER_DEV_ENABLED == 1)
792     defaultRenderNodeGraphCount += 1;
793 #endif
794     renderNodeGraphInputsVector.reserve(renderNodeGraphInputList.size() + defaultRenderNodeGraphCount);
795     renderNodeGraphInputsVector.emplace_back(defaultStagingRng_.GetHandle());
796     renderNodeGraphInputsVector.insert(renderNodeGraphInputsVector.end(), renderNodeGraphInputList.begin().ptr(),
797         renderNodeGraphInputList.end().ptr());
798     if (const auto* dataStorePod =
799             static_cast<IRenderDataStorePod*>(renderDataStoreMgr_.GetRenderDataStore("RenderDataStorePod"));
800         dataStorePod) {
801         auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
802         const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
803         if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
804             if (!defaultBackBufferGpuBufferRng_) {
805                 defaultBackBufferGpuBufferRng_ = CreateBackBufferGpuBufferRenderNodeGraph(renderNodeGraphMgr_);
806                 // we have passed render node graph pending allocations, re-allocate
807                 renderNodeGraphMgr_.HandlePendingAllocations();
808             }
809             renderNodeGraphInputsVector.emplace_back(defaultBackBufferGpuBufferRng_.GetHandle());
810         }
811     }
812     return renderNodeGraphInputsVector;
813 }
814 
ProcessTimeStampEnd()815 void Renderer::ProcessTimeStampEnd()
816 {
817     frameTimes_.end = GetTimeStampNow();
818 
819     int64_t finalTime = frameTimes_.begin;
820     finalTime = Math::max(finalTime, frameTimes_.beginBackend);
821     frameTimes_.beginBackend = finalTime;
822 
823     finalTime = Math::max(finalTime, frameTimes_.beginBackendPresent);
824     frameTimes_.beginBackendPresent = finalTime;
825 
826     finalTime = Math::max(finalTime, frameTimes_.endBackend);
827     frameTimes_.endBackend = finalTime;
828 
829     finalTime = Math::max(finalTime, frameTimes_.end);
830     frameTimes_.end = finalTime;
831 
832     PLUGIN_ASSERT(frameTimes_.end >= frameTimes_.endBackend);
833     PLUGIN_ASSERT(frameTimes_.endBackend >= frameTimes_.beginBackendPresent);
834     PLUGIN_ASSERT(frameTimes_.beginBackendPresent >= frameTimes_.beginBackend);
835     PLUGIN_ASSERT(frameTimes_.beginBackend >= frameTimes_.begin);
836 
837     renderUtil_.SetRenderTimings(frameTimes_);
838     frameTimes_ = {};
839 }
840 
Tick()841 void Renderer::Tick()
842 {
843     using namespace std::chrono;
844     const auto currentTime =
845         static_cast<uint64_t>(duration_cast<microseconds>(high_resolution_clock::now().time_since_epoch()).count());
846 
847     if (firstTime_ == ~0u) {
848         previousFrameTime_ = firstTime_ = currentTime;
849     }
850     deltaTime_ = currentTime - previousFrameTime_;
851     constexpr auto limitHz = duration_cast<microseconds>(duration<float, std::ratio<1, 15u>>(1)).count();
852     if (deltaTime_ > limitHz) {
853         deltaTime_ = limitHz; // clamp the time step to no longer than 15hz.
854     }
855     previousFrameTime_ = currentTime;
856 }
857 RENDER_END_NAMESPACE()
858