• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "renderer.h"
17 
18 #include <algorithm>
19 #include <chrono>
20 
21 #include <base/containers/string.h>
22 #include <base/containers/string_view.h>
23 #include <base/containers/unordered_map.h>
24 #include <base/containers/vector.h>
25 #include <base/math/mathf.h>
26 #if (RENDER_PERF_ENABLED == 1)
27 #include <core/perf/intf_performance_data_manager.h>
28 #endif
29 #include <render/datastore/intf_render_data_store_default_staging.h>
30 #include <render/datastore/intf_render_data_store_manager.h>
31 #include <render/datastore/render_data_store_render_pods.h>
32 #include <render/intf_render_context.h>
33 #include <render/intf_renderer.h>
34 #include <render/namespace.h>
35 #include <render/nodecontext/intf_render_node.h>
36 #include <render/render_data_structures.h>
37 
38 #include "perf/cpu_perf_scope.h"
39 
40 #if (RENDER_VALIDATION_ENABLED == 1)
41 #include <cinttypes>
42 #endif
43 
44 #include "datastore/render_data_store_manager.h"
45 #include "datastore/render_data_store_pod.h"
46 #include "default_engine_constants.h"
47 #include "device/device.h"
48 #include "device/gpu_resource_cache.h"
49 #include "device/gpu_resource_manager.h"
50 #include "device/gpu_resource_util.h"
51 #include "device/render_frame_sync.h"
52 #include "device/shader_manager.h"
53 #include "nodecontext/node_context_pso_manager.h"
54 #include "nodecontext/render_node_context_manager.h"
55 #include "nodecontext/render_node_graph_manager.h"
56 #include "nodecontext/render_node_graph_node_store.h"
57 #include "perf/cpu_timer.h"
58 #include "render_backend.h"
59 #include "render_context.h"
60 #include "render_graph.h"
61 #include "util/log.h"
62 #include "util/render_util.h"
63 
64 using namespace BASE_NS;
65 using namespace CORE_NS;
66 
67 RENDER_BEGIN_NAMESPACE()
68 namespace {
69 CORE_PROFILER_SYMBOL(FRAME_MARKER, "Render");
70 
71 const string_view RENDER_DATA_STORE_DEFAULT_STAGING { "RenderDataStoreDefaultStaging" };
72 
73 // Helper class for running lambda as a ThreadPool task.
74 template<typename Fn>
75 class FunctionTask final : public IThreadPool::ITask {
76 public:
FunctionTask(Fn && func)77     explicit FunctionTask(Fn&& func) : func_(BASE_NS::move(func)) {};
78 
operator ()()79     void operator()() override
80     {
81         func_();
82     }
83 
84 protected:
Destroy()85     void Destroy() override
86     {
87         delete this;
88     }
89 
90 private:
91     Fn func_;
92 };
93 
94 template<typename Fn>
CreateFunctionTask(Fn && func)95 inline IThreadPool::ITask::Ptr CreateFunctionTask(Fn&& func)
96 {
97     return IThreadPool::ITask::Ptr { new FunctionTask<Fn>(BASE_NS::move(func)) };
98 }
99 
100 #if (RENDER_PERF_ENABLED == 1)
101 struct NodeTimerData {
102     CpuTimer timer;
103     string_view debugName;
104 };
105 #endif
106 
107 struct RenderNodeExecutionParameters {
108     const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
109 #if (RENDER_PERF_ENABLED == 1)
110     vector<NodeTimerData>& nodeTimers;
111 #endif
112     ITaskQueue* queue;
113     IRenderDataStoreManager& renderData;
114     ShaderManager& shaderManager;
115 };
116 
GetThreadPoolThreadCount(const uint32_t numberOfHwCores,const RenderCreateInfo::ThreadPoolCreateInfo & tpci)117 inline uint32_t GetThreadPoolThreadCount(
118     const uint32_t numberOfHwCores, const RenderCreateInfo::ThreadPoolCreateInfo& tpci)
119 {
120     auto threads = static_cast<uint32_t>(static_cast<float>(numberOfHwCores) * tpci.threadCountCoefficient);
121     threads = Math::min(threads, tpci.maxCount);
122     threads = Math::max(threads, tpci.minCount);
123     threads = Math::max(1U, threads); // 1 is minimum
124     PLUGIN_LOG_D("Renderer thread pool thread count: %u", threads);
125     return threads;
126 }
127 
128 // Helper for Renderer::InitNodeGraph
InitializeRenderNodeContextData(IRenderContext & renderContext,RenderNodeGraphNodeStore & nodeStore,const bool enableMultiQueue,const RenderingConfiguration & renderConfig)129 unordered_map<string, uint32_t> InitializeRenderNodeContextData(IRenderContext& renderContext,
130     RenderNodeGraphNodeStore& nodeStore, const bool enableMultiQueue, const RenderingConfiguration& renderConfig)
131 {
132     unordered_map<string, uint32_t> renderNodeNameToIndex(nodeStore.renderNodeData.size());
133     vector<ContextInitDescription> contextInitDescs(nodeStore.renderNodeData.size());
134     for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
135         const auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
136         PLUGIN_ASSERT(renderNodeData.inputData);
137         PLUGIN_ASSERT(renderNodeData.node);
138         auto& inputData = *(renderNodeData.inputData);
139         auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
140 
141         renderNodeNameToIndex[renderNodeData.fullName] = (uint32_t)nodeIdx;
142 
143         // reset always, dependencies are redone with new nodes
144         nodeContextData.submitInfo.signalSemaphore = false;
145         nodeContextData.submitInfo.waitSemaphoreCount = 0;
146         nodeContextData.submitInfo.waitForSwapchainAcquireSignal = false;
147 
148         // with dynamic render node graphs, single nodes can be initialized
149         // set to true when doing the renderNode->InitNode();
150         if (nodeContextData.initialized) {
151             continue;
152         }
153 
154         auto& contextInitRef = contextInitDescs[nodeIdx];
155         contextInitRef.requestedQueue = inputData.queue;
156 
157         auto& device = (Device&)renderContext.GetDevice();
158         contextInitRef.requestedQueue = device.GetValidGpuQueue(contextInitRef.requestedQueue);
159 
160         auto& shaderMgr = (ShaderManager&)renderContext.GetDevice().GetShaderManager();
161         auto& gpuResourceMgr = (GpuResourceManager&)renderContext.GetDevice().GetGpuResourceManager();
162         // ordering is important
163         nodeContextData.nodeContextPsoMgr = make_unique<NodeContextPsoManager>(device, shaderMgr);
164         nodeContextData.nodeContextDescriptorSetMgr = device.CreateNodeContextDescriptorSetManager();
165         nodeContextData.renderCommandList =
166             make_unique<RenderCommandList>(renderNodeData.fullName, *nodeContextData.nodeContextDescriptorSetMgr,
167                 gpuResourceMgr, *nodeContextData.nodeContextPsoMgr, contextInitRef.requestedQueue, enableMultiQueue);
168         nodeContextData.nodeContextPoolMgr =
169             device.CreateNodeContextPoolManager(gpuResourceMgr, contextInitRef.requestedQueue);
170         RenderNodeGraphData rngd = { nodeStore.renderNodeGraphName, nodeStore.renderNodeGraphDataStoreName,
171             renderConfig };
172         RenderNodeContextManager::CreateInfo rncmci { renderContext, rngd, *renderNodeData.inputData,
173             renderNodeData.nodeName, renderNodeData.nodeJson, *nodeContextData.nodeContextDescriptorSetMgr,
174             *nodeContextData.nodeContextPsoMgr, *nodeContextData.renderCommandList,
175             *nodeStore.renderNodeGraphShareDataMgr };
176         nodeContextData.renderNodeContextManager = make_unique<RenderNodeContextManager>(rncmci);
177 #if ((RENDER_VALIDATION_ENABLED == 1) || (RENDER_VULKAN_VALIDATION_ENABLED == 1))
178         nodeContextData.nodeContextDescriptorSetMgr->SetValidationDebugName(renderNodeData.fullName);
179         nodeContextData.nodeContextPoolMgr->SetValidationDebugName(renderNodeData.fullName);
180 #endif
181         nodeContextData.renderBarrierList = make_unique<RenderBarrierList>(
182             (contextInitRef.requestedQueue.type != GpuQueue::QueueType::UNDEFINED) ? 4u : 0u);
183     }
184     return renderNodeNameToIndex;
185 }
186 
187 // Helper for Renderer::InitNodeGraph
PatchSignaling(RenderNodeGraphNodeStore & nodeStore,const unordered_map<string,uint32_t> & renderNodeNameToIndex)188 void PatchSignaling(RenderNodeGraphNodeStore& nodeStore, const unordered_map<string, uint32_t>& renderNodeNameToIndex)
189 {
190     PLUGIN_ASSERT(renderNodeNameToIndex.size() == nodeStore.renderNodeData.size());
191     for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
192         PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
193         const auto& nodeInputDataRef = *(nodeStore.renderNodeData[nodeIdx].inputData);
194         auto& submitInfo = nodeStore.renderNodeContextData[nodeIdx].submitInfo;
195 
196         for (const auto& nodeNameRef : nodeInputDataRef.gpuQueueWaitForSignals.nodeNames) {
197             if (const auto iter = renderNodeNameToIndex.find(nodeNameRef); iter != renderNodeNameToIndex.cend()) {
198                 if (submitInfo.waitSemaphoreCount < PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS) {
199                     const uint32_t index = iter->second;
200                     // mark node to signal
201                     nodeStore.renderNodeContextData[index].submitInfo.signalSemaphore = true;
202 
203                     submitInfo.waitSemaphoreNodeIndices[submitInfo.waitSemaphoreCount] = index;
204                     submitInfo.waitSemaphoreCount++;
205                 } else {
206                     PLUGIN_LOG_E("render node can wait only for (%u) render node signals",
207                         PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS);
208                     PLUGIN_ASSERT(false);
209                 }
210             } else {
211                 PLUGIN_LOG_E("invalid render node wait signal dependency");
212                 PLUGIN_ASSERT(false);
213             }
214         }
215     }
216 }
217 
218 // Helper for Renderer::RenderFrame
BeginRenderNodeGraph(RenderNodeGraphGlobalShareDataManager * rngGlobalShareDataMgr,const vector<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const RenderNodeContextManager::PerFrameTimings & timings)219 void BeginRenderNodeGraph(RenderNodeGraphGlobalShareDataManager* rngGlobalShareDataMgr,
220     const vector<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
221     const RenderNodeContextManager::PerFrameTimings& timings)
222 {
223     RenderNodeGraphShareDataManager* prevRngShareDataMgr = nullptr;
224     if (rngGlobalShareDataMgr) {
225         rngGlobalShareDataMgr->BeginFrame();
226     }
227     for (const RenderNodeGraphNodeStore* renderNodeDataStore : renderNodeGraphNodeStores) {
228         const auto renderNodeCount = static_cast<uint32_t>(renderNodeDataStore->renderNodeContextData.size());
229         auto& rngShareData = renderNodeDataStore->renderNodeGraphShareData;
230         renderNodeDataStore->renderNodeGraphShareDataMgr->BeginFrame(rngGlobalShareDataMgr, prevRngShareDataMgr,
231             renderNodeCount, { rngShareData.inputs, rngShareData.inputCount },
232             { rngShareData.outputs, rngShareData.outputCount });
233         for (uint32_t idx = 0; idx < renderNodeCount; ++idx) {
234             const RenderNodeContextData& contextData = renderNodeDataStore->renderNodeContextData[idx];
235             contextData.renderCommandList->BeginFrame();
236             contextData.renderBarrierList->BeginFrame();
237             contextData.nodeContextPoolMgr->BeginFrame();
238             contextData.nodeContextDescriptorSetMgr->BeginFrame();
239             contextData.renderNodeContextManager->BeginFrame(idx, timings);
240         }
241         prevRngShareDataMgr = renderNodeDataStore->renderNodeGraphShareDataMgr.get();
242     }
243 }
244 
245 // Helper for Renderer::RenderFrame
FillRngNodeStores(array_view<const RenderHandle> inputs,RenderNodeGraphManager & renderNodeGraphMgr,vector<RenderNodeGraphNodeStore * > & rngNodeStores)246 inline void FillRngNodeStores(array_view<const RenderHandle> inputs, RenderNodeGraphManager& renderNodeGraphMgr,
247     vector<RenderNodeGraphNodeStore*>& rngNodeStores)
248 {
249     rngNodeStores.reserve(inputs.size());
250     for (auto const& input : inputs) {
251         rngNodeStores.push_back(renderNodeGraphMgr.Get(input));
252     }
253 }
254 
255 // Helper for Renderer::RenderFrame
WaitForFence(const Device & device,RenderFrameSync & renderFrameSync)256 inline bool WaitForFence(const Device& device, RenderFrameSync& renderFrameSync)
257 {
258     RENDER_CPU_PERF_SCOPE("RenderFrame", "WaitForFrameFence");
259     renderFrameSync.WaitForFrameFence();
260 
261     return device.GetDeviceStatus();
262 }
263 
264 // Helper for Renderer::RenderFrame
ProcessRenderNodeGraph(Device & device,RenderGraph & renderGraph,array_view<RenderNodeGraphNodeStore * > graphNodeStoreView)265 inline void ProcessRenderNodeGraph(
266     Device& device, RenderGraph& renderGraph, array_view<RenderNodeGraphNodeStore*> graphNodeStoreView)
267 {
268     RENDER_CPU_PERF_SCOPE("RenderFrame", "RenderGraph");
269     renderGraph.ProcessRenderNodeGraph(device.HasSwapchain(), graphNodeStoreView);
270 }
271 
272 // Helper for Renderer::ExecuteRenderNodes
RenderNodePreExecution(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores)273 void RenderNodePreExecution(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores)
274 {
275     for (const RenderNodeGraphNodeStore* nodeStore : renderNodeGraphNodeStores) {
276         PLUGIN_ASSERT(nodeStore);
277         for (const auto& nodeIdx : nodeStore->renderNodeData) {
278             IRenderNode& renderNode = *(nodeIdx.node);
279             renderNode.PreExecuteFrame();
280         }
281     }
282 }
283 
284 // Helper for Renderer::ExecuteRenderNodes
RenderNodeExecution(RenderNodeExecutionParameters & params)285 void RenderNodeExecution(RenderNodeExecutionParameters& params)
286 {
287 #if (RENDER_PERF_ENABLED == 1)
288     size_t allNodeIdx = 0;
289 #endif
290     uint64_t taskId = 0;
291     for (const auto* nodeStorePtr : params.renderNodeGraphNodeStores) {
292         // there shouldn't be nullptrs but let's play it safe
293         PLUGIN_ASSERT(nodeStorePtr);
294         if (nodeStorePtr) {
295             const auto& nodeStore = *nodeStorePtr;
296             for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
297                 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].node);
298                 if (nodeStore.renderNodeData[nodeIdx].node) {
299                     IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
300                     RenderNodeContextData const& renderNodeContextData = nodeStore.renderNodeContextData[nodeIdx];
301                     PLUGIN_ASSERT(renderNodeContextData.renderCommandList);
302                     RenderCommandList& renderCommandList = *renderNodeContextData.renderCommandList;
303 
304                     // Do not run render node if the flag is set
305                     if ((renderNode.GetExecuteFlags() &
306                             IRenderNode::ExecuteFlagBits::EXECUTE_FLAG_BITS_DO_NOT_EXECUTE) == 0) {
307 #if (RENDER_PERF_ENABLED == 1)
308                         auto& timerRef = params.nodeTimers[allNodeIdx++];
309                         timerRef.debugName = nodeStore.renderNodeData[nodeIdx].fullName;
310                         params.queue->Submit(
311                             taskId++, CreateFunctionTask([&timerRef, &renderNode, &renderCommandList]() {
312                                 RENDER_CPU_PERF_SCOPE("ExecuteRenderNodes", timerRef.debugName);
313                                 timerRef.timer.Begin();
314 
315                                 renderCommandList.BeforeRenderNodeExecuteFrame();
316                                 renderNode.ExecuteFrame(renderCommandList);
317                                 renderCommandList.AfterRenderNodeExecuteFrame();
318 
319                                 timerRef.timer.End();
320                             }));
321 #else
322                         params.queue->Submit(taskId++, CreateFunctionTask([&renderCommandList, &renderNode]() {
323                             renderCommandList.BeforeRenderNodeExecuteFrame();
324                             renderNode.ExecuteFrame(renderCommandList);
325                             renderCommandList.AfterRenderNodeExecuteFrame();
326                         }));
327 #endif
328                     }
329                 }
330             }
331         }
332     }
333 
334     // Execute and wait for completion.
335     params.queue->Execute();
336 }
337 
338 // Helper for Renderer::ExecuteRenderBackend
IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const bool multiQueueEnabled,RenderCommandFrameData & rcfd)339 void IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
340     const bool multiQueueEnabled, RenderCommandFrameData& rcfd)
341 {
342     for (const RenderNodeGraphNodeStore* nodeStore : renderNodeGraphNodeStores) {
343         PLUGIN_ASSERT(nodeStore);
344         if (!nodeStore) {
345             continue;
346         }
347 
348         unordered_map<uint32_t, uint32_t> nodeIdxToRenderCommandContextIdx;
349         const auto multiQueuePatchBeginIdx = (uint32_t)rcfd.renderCommandContexts.size();
350         uint32_t multiQueuePatchCount = 0;
351         if (multiQueueEnabled) {
352             nodeIdxToRenderCommandContextIdx.reserve(nodeStore->renderNodeContextData.size());
353         }
354 
355         for (size_t nodeIdx = 0; nodeIdx < nodeStore->renderNodeContextData.size(); ++nodeIdx) {
356             const auto& ref = nodeStore->renderNodeContextData[nodeIdx];
357             PLUGIN_ASSERT((ref.renderCommandList != nullptr) && (ref.renderBarrierList != nullptr) &&
358                           (ref.nodeContextPsoMgr != nullptr) && (ref.nodeContextPoolMgr != nullptr));
359             const bool valid = (ref.renderCommandList->HasValidRenderCommands());
360             if (valid) {
361                 if (multiQueueEnabled) {
362                     nodeIdxToRenderCommandContextIdx[(uint32_t)nodeIdx] = (uint32_t)rcfd.renderCommandContexts.size();
363                     multiQueuePatchCount++;
364                 }
365                 // get final backend node index of the first render node which uses the swapchain image
366                 const auto backendNodeIdx = static_cast<uint32_t>(rcfd.renderCommandContexts.size());
367                 if ((rcfd.firstSwapchainNodeIdx > backendNodeIdx) && (ref.submitInfo.waitForSwapchainAcquireSignal)) {
368                     rcfd.firstSwapchainNodeIdx = static_cast<uint32_t>(rcfd.renderCommandContexts.size());
369                 }
370                 rcfd.renderCommandContexts.push_back({ ref.renderBackendNode, ref.renderCommandList.get(),
371                     ref.renderBarrierList.get(), ref.nodeContextPsoMgr.get(), ref.nodeContextDescriptorSetMgr.get(),
372                     ref.nodeContextPoolMgr.get(), (uint32_t)nodeIdx, ref.submitInfo,
373                     nodeStore->renderNodeData[nodeIdx].fullName });
374             }
375         }
376 
377         if (multiQueueEnabled) { // patch correct render command context indices
378             for (uint32_t idx = multiQueuePatchBeginIdx; idx < multiQueuePatchCount; ++idx) {
379                 auto& ref = rcfd.renderCommandContexts[idx];
380                 const auto& nodeContextRef = nodeStore->renderNodeContextData[ref.renderGraphRenderNodeIndex];
381 
382                 ref.submitDepencies.signalSemaphore = nodeContextRef.submitInfo.signalSemaphore;
383                 ref.submitDepencies.waitSemaphoreCount = nodeContextRef.submitInfo.waitSemaphoreCount;
384                 for (uint32_t waitIdx = 0; waitIdx < ref.submitDepencies.waitSemaphoreCount; ++waitIdx) {
385                     const uint32_t currRenderNodeIdx = nodeContextRef.submitInfo.waitSemaphoreNodeIndices[waitIdx];
386                     PLUGIN_ASSERT(nodeIdxToRenderCommandContextIdx.count(currRenderNodeIdx) == 1);
387 
388                     ref.submitDepencies.waitSemaphoreNodeIndices[waitIdx] =
389                         nodeIdxToRenderCommandContextIdx[currRenderNodeIdx];
390                 }
391             }
392         }
393     }
394 }
395 
396 template<typename T>
IsNull(T * ptr)397 inline bool IsNull(T* ptr)
398 {
399     return ptr == nullptr;
400 }
401 
GetTimeStampNow()402 inline int64_t GetTimeStampNow()
403 {
404     using namespace std::chrono;
405     using Clock = system_clock;
406     return Clock::now().time_since_epoch().count();
407 }
408 
CreateDefaultRenderNodeGraphs(const Device & device,RenderNodeGraphManager & rngMgr,RenderHandleReference & defaultStaging,RenderHandleReference & defaultEndFrameStaging)409 void CreateDefaultRenderNodeGraphs(const Device& device, RenderNodeGraphManager& rngMgr,
410     RenderHandleReference& defaultStaging, RenderHandleReference& defaultEndFrameStaging)
411 {
412     {
413         RenderNodeGraphDesc rngd;
414         {
415             RenderNodeDesc rnd;
416             rnd.typeName = "CORE_RN_STAGING";
417             rnd.nodeName = "CORE_RN_STAGING_I";
418             rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
419             rngd.nodes.push_back(move(rnd));
420         }
421 #if (RENDER_VULKAN_RT_ENABLED == 1)
422         if (device.GetBackendType() == DeviceBackendType::VULKAN) {
423             RenderNodeDesc rnd;
424             rnd.typeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING";
425             rnd.nodeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING_I";
426             rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
427             rngd.nodes.push_back(move(rnd));
428         }
429 #endif
430         defaultStaging =
431             rngMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
432     }
433     {
434         RenderNodeGraphDesc rngd;
435         {
436             RenderNodeDesc rnd;
437             rnd.typeName = "CORE_RN_END_FRAME_STAGING";
438             rnd.nodeName = "CORE_RN_END_FRAME_STAGING_I";
439             rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
440             rngd.nodes.push_back(move(rnd));
441         }
442         defaultEndFrameStaging =
443             rngMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
444     }
445 }
446 } // namespace
447 
Renderer(IRenderContext & context)448 Renderer::Renderer(IRenderContext& context)
449     : renderContext_(context), device_(static_cast<Device&>(context.GetDevice())),
450       gpuResourceMgr_(static_cast<GpuResourceManager&>(device_.GetGpuResourceManager())),
451       shaderMgr_(static_cast<ShaderManager&>(device_.GetShaderManager())),
452       renderNodeGraphMgr_(static_cast<RenderNodeGraphManager&>(context.GetRenderNodeGraphManager())),
453       renderDataStoreMgr_(static_cast<RenderDataStoreManager&>(context.GetRenderDataStoreManager())),
454       renderUtil_(static_cast<RenderUtil&>(context.GetRenderUtil()))
455 
456 {
457     const RenderCreateInfo rci = ((const RenderContext&)renderContext_).GetCreateInfo();
458     if (rci.createFlags & RenderCreateInfo::CreateInfoFlagBits::CREATE_INFO_SEPARATE_RENDER_FRAME_BACKEND_BIT) {
459         separatedRendering_.separateBackend = true;
460     }
461     if (rci.createFlags & RenderCreateInfo::CreateInfoFlagBits::CREATE_INFO_SEPARATE_RENDER_FRAME_PRESENT_BIT) {
462         separatedRendering_.separatePresent = true;
463     }
464 
465     const auto factory = GetInstance<ITaskQueueFactory>(UID_TASK_QUEUE_FACTORY);
466     if (factory) {
467         const uint32_t threadCount = GetThreadPoolThreadCount(factory->GetNumberOfCores(), rci.threadPoolCreateInfo);
468         forceSequentialQueue_ = (threadCount <= 1U);
469         threadPool_ = factory->CreateThreadPool(threadCount);
470         parallelQueue_ = factory->CreateParallelTaskQueue(threadPool_);
471         sequentialQueue_ = factory->CreateSequentialTaskQueue(threadPool_);
472     }
473 
474     renderConfig_ = { device_.GetBackendType(), RenderingConfiguration::NdcOrigin::TOP_LEFT };
475 #if ((RENDER_HAS_GL_BACKEND) || (RENDER_HAS_GLES_BACKEND)) && (RENDER_GL_FLIP_Y_SWAPCHAIN == 0)
476     // The flag is for informative purposes only.
477     if ((renderConfig_.renderBackend == DeviceBackendType::OPENGL) ||
478         (renderConfig_.renderBackend == DeviceBackendType::OPENGLES)) {
479         renderConfig_.ndcOrigin = RenderingConfiguration::NdcOrigin::BOTTOM_LEFT;
480     }
481 #endif
482 
483     renderGraph_ = make_unique<RenderGraph>(device_);
484     renderBackend_ = device_.CreateRenderBackend(gpuResourceMgr_, forceSequentialQueue_
485                                                                       ? static_cast<ITaskQueue*>(sequentialQueue_.get())
486                                                                       : static_cast<ITaskQueue*>(parallelQueue_.get()));
487     renderFrameSync_ = device_.CreateRenderFrameSync();
488     rngGlobalShareDataMgr_ = make_unique<RenderNodeGraphGlobalShareDataManager>();
489 
490     CreateDefaultRenderNodeGraphs(device_, renderNodeGraphMgr_, defaultStagingRng_, defaultEndFrameStagingRng_);
491 
492     dsStaging_ = static_cast<IRenderDataStoreDefaultStaging*>(
493         renderDataStoreMgr_.GetRenderDataStore(RENDER_DATA_STORE_DEFAULT_STAGING).get());
494 }
495 
496 Renderer::~Renderer() = default;
497 
InitNodeGraphs(const array_view<const RenderHandle> renderNodeGraphs)498 void Renderer::InitNodeGraphs(const array_view<const RenderHandle> renderNodeGraphs)
499 {
500     const RenderNodeGraphShareDataManager* prevRngShareDataMgr = nullptr;
501     for (const auto& rng : renderNodeGraphs) {
502         auto renderNodeDataStore = renderNodeGraphMgr_.Get(rng);
503         if (!renderNodeDataStore) {
504             continue;
505         }
506 
507         RenderNodeGraphNodeStore& nodeStore = *renderNodeDataStore;
508         if (nodeStore.initialized) {
509             continue;
510         }
511         nodeStore.initialized = true;
512 
513         const bool enableMultiQueue = (device_.GetGpuQueueCount() > 1);
514 
515         // serial, initialize render node context data
516         auto renderNodeNameToIndex =
517             InitializeRenderNodeContextData(renderContext_, nodeStore, enableMultiQueue, renderConfig_);
518 
519         if (enableMultiQueue) {
520             // patch gpu queue signaling
521             PatchSignaling(nodeStore, renderNodeNameToIndex);
522         }
523 
524         // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
525         nodeStore.renderNodeGraphShareDataMgr->BeginFrame(rngGlobalShareDataMgr_.get(), prevRngShareDataMgr,
526             static_cast<uint32_t>(nodeStore.renderNodeData.size()),
527             { nodeStore.renderNodeGraphShareData.inputs, nodeStore.renderNodeGraphShareData.inputCount },
528             { nodeStore.renderNodeGraphShareData.outputs, nodeStore.renderNodeGraphShareData.outputCount });
529         prevRngShareDataMgr = nodeStore.renderNodeGraphShareDataMgr.get();
530 
531         const RenderNodeContextManager::PerFrameTimings timings { 0, 0, device_.GetFrameCount() };
532         for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
533             auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
534             if (nodeContextData.initialized) {
535                 continue;
536             }
537             nodeContextData.initialized = true;
538 
539             // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
540             nodeContextData.renderNodeContextManager->BeginFrame(static_cast<uint32_t>(nodeIdx), timings);
541 
542             auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
543             PLUGIN_ASSERT(renderNodeData.inputData);
544             PLUGIN_ASSERT(renderNodeData.node);
545 
546             RENDER_CPU_PERF_SCOPE("InitRenderNodes", renderNodeData.fullName);
547             renderNodeData.node->InitNode(*(nodeContextData.renderNodeContextManager));
548         }
549     }
550 }
551 
552 // Helper for Renderer::RenderFrame
RemapBackBufferHandle(const IRenderDataStoreManager & renderData)553 void Renderer::RemapBackBufferHandle(const IRenderDataStoreManager& renderData)
554 {
555     const refcnt_ptr<IRenderDataStorePod> dataStorePod = renderData.GetRenderDataStore(RenderDataStorePod::TYPE_NAME);
556     if (dataStorePod) {
557         auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
558         const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
559         if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) {
560             if (!device_.HasSwapchain()) {
561                 PLUGIN_LOG_E("Using swapchain rendering without swapchain");
562             }
563         } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE) {
564             const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
565             if (RenderHandleUtil::IsValid(handle) && RenderHandleUtil::IsValid(bb->backBufferHandle)) {
566                 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle);
567             }
568         } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
569             const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
570             if (RenderHandleUtil::IsValid(handle) && RenderHandleUtil::IsValid(bb->backBufferHandle) &&
571                 RenderHandleUtil::IsValid(bb->gpuBufferHandle)) {
572                 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle);
573             }
574             // handle image to buffer copy via post frame staging
575             {
576                 RenderHandle backbufferHandle = bb->backBufferHandle;
577                 if (bb->backBufferName == DefaultEngineGpuResourceConstants::CORE_DEFAULT_BACKBUFFER) {
578                     // we need to use the core default backbuffer handle and not the replaced handle in this situation
579                     backbufferHandle =
580                         gpuResourceMgr_.GetImageHandle(DefaultEngineGpuResourceConstants::CORE_DEFAULT_BACKBUFFER)
581                             .GetHandle();
582                 }
583                 const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(backbufferHandle);
584                 const BufferImageCopy bic {
585                     0,                                                                // bufferOffset
586                     0,                                                                // bufferRowLength
587                     0,                                                                // bufferImageHeight
588                     ImageSubresourceLayers { CORE_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1u }, // imageSubresource
589                     Size3D { 0, 0, 0 },                                               // imageOffset
590                     Size3D { desc.width, desc.height, 1u },                           // imageExtent
591                 };
592                 dsStaging_->CopyImageToBuffer(gpuResourceMgr_.Get(backbufferHandle),
593                     gpuResourceMgr_.Get(bb->gpuBufferHandle), bic,
594                     IRenderDataStoreDefaultStaging::ResourceCopyInfo::END_FRAME);
595             }
596         }
597     }
598 }
599 
RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)600 void Renderer::RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)
601 {
602     if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
603         separatedRendering_.frontMtx.lock();
604     }
605 
606     RENDER_CPU_PERF_BEGIN(renderFront, "RenderFrame", "Frontend");
607 
608     Tick();
609     frameTimes_.begin = GetTimeStampNow();
610 
611     if (!device_.GetDeviceStatus()) {
612         ProcessTimeStampEnd();
613 #if (RENDER_VALIDATION_ENABLED == 1)
614         PLUGIN_LOG_ONCE_E("invalid_device_status_render_frame", "invalid device for rendering");
615 #endif
616         return;
617     }
618     CORE_PROFILER_MARK_FRAME_START(FRAME_MARKER);
619 
620     renderDataStoreMgr_.CommitFrameData();
621 
622     device_.Activate();
623     device_.FrameStart();
624     renderFrameTimeData_.frameIndex = device_.GetFrameCount();
625 
626     (static_cast<GpuResourceCache&>(gpuResourceMgr_.GetGpuResourceCache())).BeginFrame(device_.GetFrameCount());
627 
628     // handle utils (needs to be called before render data store pre renders)
629     renderUtil_.BeginFrame();
630     // global descriptor set manager
631     device_.GetDescriptorSetManager().BeginFrame();
632 
633     // remap the default back buffer (needs to be called before render data store pre renders)
634     RemapBackBufferHandle(renderDataStoreMgr_);
635 
636     renderNodeGraphMgr_.HandlePendingAllocations();
637     renderDataStoreMgr_.PreRender();
638 
639     // create new shaders if any created this frame (needs to be called before render node init)
640     shaderMgr_.HandlePendingAllocations();
641 
642     auto& rngInputs = renderFrameTimeData_.rngInputs;
643     auto& rngNodeStores = renderFrameTimeData_.rngNodeStores;
644     PLUGIN_ASSERT(rngInputs.empty());
645     PLUGIN_ASSERT(rngNodeStores.empty());
646 
647     gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::UNDEFINED);
648     // update render node graphs with default staging
649     FillRngInputs(renderNodeGraphs, rngInputs);
650     const auto renderNodeGraphInputs = array_view(rngInputs.data(), rngInputs.size());
651 
652     InitNodeGraphs(renderNodeGraphInputs);
653     device_.Deactivate();
654 
655     renderGraph_->BeginFrame();
656 
657     FillRngNodeStores(renderNodeGraphInputs, renderNodeGraphMgr_, rngNodeStores);
658     if (std::any_of(rngNodeStores.begin(), rngNodeStores.end(), IsNull<RenderNodeGraphNodeStore>)) {
659         ProcessTimeStampEnd();
660         PLUGIN_LOG_W("invalid render node graphs for rendering");
661         return;
662     }
663 
664     // NodeContextPoolManagerGLES::BeginFrame may delete FBOs and device must be active.
665     device_.Activate();
666 
667     renderFrameSync_->BeginFrame();
668     // begin frame (advance ring buffers etc.)
669     const RenderNodeContextManager::PerFrameTimings timings { previousFrameTime_ - firstTime_, deltaTime_,
670         device_.GetFrameCount() };
671     BeginRenderNodeGraph(rngGlobalShareDataMgr_.get(), rngNodeStores, timings);
672 
673     // synchronize, needed for persistantly mapped gpu buffer writing
674     if (!WaitForFence(device_, *renderFrameSync_)) {
675         device_.Deactivate();
676         return; // possible lost device with frame fence
677     }
678 
679     // gpu resource allocation and deallocation
680     gpuResourceMgr_.HandlePendingAllocations(true);
681 
682     device_.Deactivate();
683 
684     const auto nodeStoresView = array_view<RenderNodeGraphNodeStore*>(rngNodeStores);
685     ExecuteRenderNodes(nodeStoresView);
686 
687     // render graph process for all render nodes of all render graphs
688     ProcessRenderNodeGraph(device_, *renderGraph_, nodeStoresView);
689 
690     renderDataStoreMgr_.PostRender();
691 
692     // set front-end index (before mutexes)
693     renderStatus_.frontEndIndex = renderFrameTimeData_.frameIndex;
694     if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
695         separatedRendering_.frontMtx.unlock();
696     }
697     RENDER_CPU_PERF_END(renderFront);
698     if (!separatedRendering_.separateBackend) {
699         RenderFrameBackendImpl();
700     }
701 }
702 
RenderFrameBackendImpl()703 void Renderer::RenderFrameBackendImpl()
704 {
705     if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
706         separatedRendering_.frontMtx.lock();
707         separatedRendering_.backMtx.lock();
708     }
709 
710     RENDER_CPU_PERF_BEGIN(renderBack, "RenderFrame", "Backend");
711 
712     auto& rngInputs = renderFrameTimeData_.rngInputs;
713     auto& rngNodeStores = renderFrameTimeData_.rngNodeStores;
714 
715     gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_BACKEND);
716     device_.SetLockResourceBackendAccess(true);
717     renderDataStoreMgr_.PreRenderBackend();
718 
719     size_t allRenderNodeCount = 0;
720     for (const auto* nodeStore : rngNodeStores) {
721         PLUGIN_ASSERT(nodeStore);
722         if (nodeStore) {
723             allRenderNodeCount += nodeStore->renderNodeData.size();
724         }
725     }
726 
727     RenderCommandFrameData rcfd;
728     PLUGIN_ASSERT(renderFrameSync_);
729     rcfd.renderFrameSync = renderFrameSync_.get();
730     rcfd.renderFrameUtil = &(static_cast<RenderFrameUtil&>(renderContext_.GetRenderUtil().GetRenderFrameUtil()));
731     rcfd.renderCommandContexts.reserve(allRenderNodeCount);
732 
733     const bool multiQueueEnabled = (device_.GetGpuQueueCount() > 1u);
734     IterateRenderBackendNodeGraphNodeStores(rngNodeStores, multiQueueEnabled, rcfd);
735 
736     // NOTE: by node graph name
737     // NOTE: deprecate this
738     const RenderGraph::SwapchainStates bbState = renderGraph_->GetSwapchainResourceStates();
739     RenderBackendBackBufferConfiguration config;
740     for (const auto& swapState : bbState.swapchains) {
741         config.swapchainData.push_back({ swapState.handle, swapState.state, swapState.layout, {} });
742     }
743     if (!config.swapchainData.empty()) {
744         // NOTE: this is a backwards compatibility for a single (default) swapchain config data
745         // should be removed
746         if (const refcnt_ptr<IRenderDataStorePod> dataStorePod =
747                 renderDataStoreMgr_.GetRenderDataStore(RenderDataStorePod::TYPE_NAME)) {
748             auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
749             if (dataView.size_bytes() == sizeof(NodeGraphBackBufferConfiguration)) {
750                 // expects to be the first swapchain in the list
751                 const auto* bb = (const NodeGraphBackBufferConfiguration*)dataView.data();
752                 config.swapchainData[0U].config = *bb;
753             }
754         }
755     }
756     renderFrameTimeData_.config = config;
757     // must run backend if there are descriptor sets to update even if there's nothing to render.
758     renderFrameTimeData_.hasBackendWork = (!rcfd.renderCommandContexts.empty()) ||
759                                           (!device_.GetDescriptorSetManager().GetUpdateDescriptorSetHandles().empty());
760 
761     device_.Activate();
762 
763     if (renderFrameTimeData_.hasBackendWork) { // do not execute backend with zero work
764         device_.SetRenderBackendRunning(true);
765 
766         frameTimes_.beginBackend = GetTimeStampNow();
767         renderBackend_->Render(rcfd, config);
768         frameTimes_.endBackend = GetTimeStampNow();
769 
770         device_.SetRenderBackendRunning(false);
771     }
772     gpuResourceMgr_.EndFrame();
773 
774     if (separatedRendering_.separatePresent) {
775         device_.Deactivate();
776     }
777 
778     device_.SetLockResourceBackendAccess(false);
779 
780     // clear
781     rngInputs.clear();
782     rngNodeStores.clear();
783 
784     // set backend-end index (before mutexes)
785     renderStatus_.backEndIndex = renderStatus_.frontEndIndex;
786     if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
787         separatedRendering_.frontMtx.unlock();
788         separatedRendering_.backMtx.unlock();
789     }
790     RENDER_CPU_PERF_END(renderBack);
791     if (!separatedRendering_.separatePresent) {
792         RenderFramePresentImpl();
793     }
794 }
795 
RenderFramePresentImpl()796 void Renderer::RenderFramePresentImpl()
797 {
798     if (separatedRendering_.separatePresent) {
799         separatedRendering_.backMtx.lock();
800     }
801 
802     RENDER_CPU_PERF_SCOPE("RenderFrame", "Presentation");
803 
804     if (renderFrameTimeData_.hasBackendWork) { // do not execute backend with zero work
805         if (separatedRendering_.separatePresent) {
806             device_.Activate();
807         }
808 
809         frameTimes_.beginBackendPresent = GetTimeStampNow();
810         renderBackend_->Present(renderFrameTimeData_.config);
811         frameTimes_.endBackendPresent = GetTimeStampNow();
812 
813         if (separatedRendering_.separatePresent) {
814             device_.Deactivate();
815         }
816     }
817     if (!separatedRendering_.separatePresent) {
818         device_.Deactivate();
819     }
820 
821     renderDataStoreMgr_.PostRenderBackend();
822 
823     renderFrameTimeData_.config = {};
824 
825     // needs to be called after render data store post render
826     renderUtil_.EndFrame();
827 
828     // RenderFramePresentImpl() needs to be called every frame even thought there isn't presenting
829     device_.FrameEnd();
830     ProcessTimeStampEnd();
831     CORE_PROFILER_MARK_FRAME_END(FRAME_MARKER);
832 
833     CORE_PROFILER_MARK_GLOBAL_FRAME_CHANGED();
834     // set presentation index (before mutexes)
835     renderStatus_.presentIndex = renderStatus_.backEndIndex;
836     if (separatedRendering_.separatePresent) {
837         separatedRendering_.backMtx.unlock();
838     }
839 }
840 
RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)841 uint64_t Renderer::RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)
842 {
843     const auto lock = std::lock_guard(renderMutex_);
844 
845     // add only unique and valid handles to list for rendering
846     vector<RenderHandle> rngs;
847     rngs.reserve(renderNodeGraphs.size());
848     for (size_t iIdx = 0; iIdx < renderNodeGraphs.size(); ++iIdx) {
849         const RenderHandle& handle = renderNodeGraphs[iIdx].GetHandle();
850         bool duplicate = false;
851         for (auto& ref : rngs) {
852             if (ref == handle) {
853                 duplicate = true;
854             }
855         }
856         if ((RenderHandleUtil::GetHandleType(handle) == RenderHandleType::RENDER_NODE_GRAPH) && (!duplicate)) {
857             rngs.push_back(handle);
858         }
859 #if (RENDER_VALIDATION_ENABLED == 1)
860         if (duplicate) {
861             PLUGIN_LOG_ONCE_E("renderer_rf_duplicate_rng",
862                 "RENDER_VALIDATION: duplicate render node graphs are not supported (idx: %u, id: %" PRIx64,
863                 static_cast<uint32_t>(iIdx), handle.id);
864         }
865 #endif
866     }
867     device_.SetRenderFrameRunning(true);
868     // NOTE: this is the only place from where RenderFrameImpl is called
869     RenderFrameImpl(rngs);
870     device_.SetRenderFrameRunning(false);
871 
872     return renderStatus_.frontEndIndex;
873 }
874 
RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)875 uint64_t Renderer::RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)
876 {
877     const auto lock = std::lock_guard(deferredMutex_);
878     for (const auto& ref : renderNodeGraphs) {
879         deferredRenderNodeGraphs_.push_back(ref);
880     }
881     return renderStatusDeferred_ + 1;
882 }
883 
RenderDeferredFrame()884 uint64_t Renderer::RenderDeferredFrame()
885 {
886     deferredMutex_.lock();
887     decltype(deferredRenderNodeGraphs_) renderNodeGraphs = move(deferredRenderNodeGraphs_);
888     renderStatusDeferred_ = renderStatus_.frontEndIndex + 1;
889     deferredMutex_.unlock();
890     RenderFrame(renderNodeGraphs);
891 
892     return renderStatus_.frontEndIndex;
893 }
894 
ExecuteRenderNodes(const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)895 void Renderer::ExecuteRenderNodes(const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
896 {
897 #if (RENDER_PERF_ENABLED == 1)
898     RENDER_CPU_PERF_BEGIN(fullExecuteCpuTimer, "RenderFrame", "ExecuteAllNodes");
899 
900     size_t allRenderNodeCount = 0;
901     for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
902         allRenderNodeCount += renderNodeGraphNodeStores[graphIdx]->renderNodeData.size();
903     }
904 
905     vector<NodeTimerData> nodeTimers(allRenderNodeCount);
906 #endif
907 
908     ITaskQueue* queue = nullptr;
909     if ((!forceSequentialQueue_) && device_.AllowThreadedProcessing()) {
910         queue = parallelQueue_.get();
911     } else {
912         queue = sequentialQueue_.get();
913     }
914     if (!queue) {
915         return; // fatal
916     }
917 
918     gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_PRE_EXECUTE);
919     // single threaded gpu resource creation with render nodes
920     RenderNodePreExecution(renderNodeGraphNodeStores);
921 
922     // lock staging data for this frame
923     // NOTE: should be done with double buffering earlier
924     gpuResourceMgr_.LockFrameStagingData();
925     // final gpu resource allocation and deallocation before render node execute
926     device_.Activate();
927     gpuResourceMgr_.HandlePendingAllocations(true);
928     gpuResourceMgr_.MapRenderTimeGpuBuffers();
929     device_.Deactivate();
930 
931     // process render node graph render node share preparations
932     for (auto& ref : renderNodeGraphNodeStores) {
933         ref->renderNodeGraphShareDataMgr->PrepareExecuteFrame();
934     }
935 
936     // lock global descriptor set creation
937     device_.GetDescriptorSetManager().LockFrameCreation();
938     gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_EXECUTE);
939 
940     RenderNodeExecutionParameters params = {
941         renderNodeGraphNodeStores,
942 #if (RENDER_PERF_ENABLED == 1)
943         nodeTimers,
944 #endif
945         queue,
946         renderDataStoreMgr_,
947         shaderMgr_,
948     };
949 
950     // multi-threaded render node execution
951     RenderNodeExecution(params);
952 
953     // Remove tasks.
954     queue->Clear();
955 
956     // final gpu resource allocation before render graph
957     device_.Activate();
958     gpuResourceMgr_.UnmapRenderTimeGpuBuffers();
959     // do not allow destruction here
960     gpuResourceMgr_.HandlePendingAllocations(false);
961     device_.Deactivate();
962 
963 #if (RENDER_PERF_ENABLED == 1)
964     RENDER_CPU_PERF_END(fullExecuteCpuTimer);
965 
966     if (auto* inst = GetInstance<IPerformanceDataManagerFactory>(UID_PERFORMANCE_FACTORY); inst) {
967         if (IPerformanceDataManager* perfData = inst->Get("RenderNode"); perfData) {
968             for (size_t nodeIdx = 0; nodeIdx < nodeTimers.size(); ++nodeIdx) {
969                 const auto& timerRef = nodeTimers[nodeIdx];
970                 perfData->UpdateData(timerRef.debugName, "RenderNodeExecute_Cpu", timerRef.timer.GetMicroseconds());
971             }
972         }
973     }
974 #endif
975 }
976 
RenderFrameBackend(const RenderFrameBackendInfo & info)977 uint64_t Renderer::RenderFrameBackend(const RenderFrameBackendInfo& info)
978 {
979     if (separatedRendering_.separateBackend) {
980         RenderFrameBackendImpl();
981     } else {
982         PLUGIN_LOG_E("RenderFrameBackend called separately even though render context not created as separate");
983     }
984 
985     return renderStatus_.backEndIndex;
986 }
987 
RenderFramePresent(const RenderFramePresentInfo & info)988 uint64_t Renderer::RenderFramePresent(const RenderFramePresentInfo& info)
989 {
990     if (separatedRendering_.separatePresent) {
991         RenderFramePresentImpl();
992     } else {
993         PLUGIN_LOG_E("RenderFramePresent called separately even though render context not created as separate");
994     }
995 
996     return renderStatus_.presentIndex;
997 }
998 
GetFrameStatus() const999 IRenderer::RenderStatus Renderer::GetFrameStatus() const
1000 {
1001     return renderStatus_;
1002 }
1003 
FillRngInputs(const array_view<const RenderHandle> renderNodeGraphInputList,vector<RenderHandle> & rngInputs)1004 void Renderer::FillRngInputs(
1005     const array_view<const RenderHandle> renderNodeGraphInputList, vector<RenderHandle>& rngInputs)
1006 {
1007     constexpr size_t defaultRenderNodeGraphCount = 2;
1008     rngInputs.reserve(renderNodeGraphInputList.size() + defaultRenderNodeGraphCount);
1009     rngInputs.push_back(defaultStagingRng_.GetHandle());
1010     rngInputs.append(renderNodeGraphInputList.begin().ptr(), renderNodeGraphInputList.end().ptr());
1011     rngInputs.push_back(defaultEndFrameStagingRng_.GetHandle());
1012 }
1013 
ProcessTimeStampEnd()1014 void Renderer::ProcessTimeStampEnd()
1015 {
1016     frameTimes_.end = GetTimeStampNow();
1017 
1018     int64_t finalTime = frameTimes_.begin;
1019     finalTime = Math::max(finalTime, frameTimes_.beginBackend);
1020     frameTimes_.beginBackend = finalTime;
1021 
1022     finalTime = Math::max(finalTime, frameTimes_.endBackend);
1023     frameTimes_.endBackend = finalTime;
1024 
1025     finalTime = Math::max(finalTime, frameTimes_.beginBackendPresent);
1026     frameTimes_.beginBackendPresent = finalTime;
1027 
1028     finalTime = Math::max(finalTime, frameTimes_.endBackendPresent);
1029     frameTimes_.endBackendPresent = finalTime;
1030 
1031     finalTime = Math::max(finalTime, frameTimes_.end);
1032     frameTimes_.end = finalTime;
1033 
1034     PLUGIN_ASSERT(frameTimes_.end >= frameTimes_.endBackend);
1035     PLUGIN_ASSERT(frameTimes_.endBackend >= frameTimes_.beginBackend);
1036     PLUGIN_ASSERT(frameTimes_.beginBackendPresent >= frameTimes_.beginBackend);
1037     PLUGIN_ASSERT(frameTimes_.endBackendPresent >= frameTimes_.beginBackendPresent);
1038 
1039     renderUtil_.SetRenderTimings(frameTimes_);
1040     frameTimes_ = {};
1041 }
1042 
Tick()1043 void Renderer::Tick()
1044 {
1045     using namespace std::chrono;
1046     const auto currentTime =
1047         static_cast<uint64_t>(duration_cast<microseconds>(high_resolution_clock::now().time_since_epoch()).count());
1048 
1049     if (firstTime_ == ~0u) {
1050         previousFrameTime_ = firstTime_ = currentTime;
1051     }
1052     deltaTime_ = currentTime - previousFrameTime_;
1053     constexpr auto limitHz = duration_cast<microseconds>(duration<float, std::ratio<1, 15u>>(1)).count();
1054     if (deltaTime_ > limitHz) {
1055         deltaTime_ = limitHz; // clamp the time step to no longer than 15hz.
1056     }
1057     previousFrameTime_ = currentTime;
1058 }
1059 RENDER_END_NAMESPACE()
1060