1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "renderer.h"
17
18 #include <algorithm>
19 #include <chrono>
20 #include <functional>
21 #include <utility>
22
23 #include <base/containers/string.h>
24 #include <base/containers/string_view.h>
25 #include <base/containers/unordered_map.h>
26 #include <base/containers/vector.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <render/datastore/intf_render_data_store_manager.h>
29 #include <render/datastore/intf_render_data_store_pod.h>
30 #include <render/datastore/render_data_store_render_pods.h>
31 #include <render/intf_render_context.h>
32 #include <render/intf_renderer.h>
33 #include <render/namespace.h>
34 #include <render/nodecontext/intf_render_node.h>
35 #include <render/render_data_structures.h>
36
37 #include "perf/cpu_perf_scope.h"
38
39 #if (RENDER_DEV_ENABLED == 1)
40 #include <cinttypes>
41 #endif
42
43 #include "datastore/render_data_store_manager.h"
44 #include "device/device.h"
45 #include "device/gpu_resource_manager.h"
46 #include "device/gpu_resource_util.h"
47 #include "device/render_frame_sync.h"
48 #include "device/shader_manager.h"
49 #include "nodecontext/node_context_descriptor_set_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_node_context_manager.h"
52 #include "nodecontext/render_node_graph_manager.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "perf/cpu_timer.h"
55 #include "render_backend.h"
56 #include "render_graph.h"
57 #include "util/log.h"
58 #include "util/render_util.h"
59
60 using namespace BASE_NS;
61 using namespace CORE_NS;
62
63 RENDER_BEGIN_NAMESPACE()
64 namespace {
65 // Helper class for running std::function as a ThreadPool task.
66 class FunctionTask final : public IThreadPool::ITask {
67 public:
Create(std::function<void ()> func)68 static Ptr Create(std::function<void()> func)
69 {
70 return Ptr { new FunctionTask(func) };
71 }
72
FunctionTask(std::function<void ()> func)73 explicit FunctionTask(std::function<void()> func) : func_(func) {};
74
operator ()()75 void operator()() override
76 {
77 func_();
78 }
79
80 protected:
Destroy()81 void Destroy() override
82 {
83 delete this;
84 }
85
86 private:
87 std::function<void()> func_;
88 };
89
90 #if (RENDER_PERF_ENABLED == 1)
91 struct NodeTimerData {
92 CpuTimer timer;
93 string_view debugName;
94 };
95 #endif
96
97 struct RenderNodeExecutionParameters {
98 const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
99 #if (RENDER_PERF_ENABLED == 1)
100 vector<NodeTimerData>& nodeTimers;
101 #endif
102 ITaskQueue* queue;
103 IRenderDataStoreManager& renderData;
104 ShaderManager& shaderManager;
105 RenderingConfiguration& renderConfig;
106 };
107
ProcessShaderReload(Device & device,ShaderManager & shaderMgr,RenderNodeGraphManager & renderNodeGraphMgr,const array_view<const RenderHandle> & renderNodeGraphs)108 void ProcessShaderReload(Device& device, ShaderManager& shaderMgr, RenderNodeGraphManager& renderNodeGraphMgr,
109 const array_view<const RenderHandle>& renderNodeGraphs)
110 {
111 if (shaderMgr.HasReloadedShaders()) {
112 device.WaitForIdle();
113 // NOTE: would be better to force pso re-creation based on low-level handle, but cannot be done at the moment
114 PLUGIN_LOG_I("RENDER_PERFORMANCE_WARNING: re-init render nodes because of reloaded shaders");
115 for (const auto& ref : renderNodeGraphs) {
116 RenderNodeGraphNodeStore* nodeStore = renderNodeGraphMgr.Get(ref);
117 if (nodeStore) {
118 nodeStore->initialized = false;
119 for (auto& nodeContextRef : nodeStore->renderNodeContextData) {
120 nodeContextRef.initialized = false; // re-init all nodes
121 }
122 }
123 }
124 }
125 }
126
CreateBackBufferGpuBufferRenderNodeGraph(RenderNodeGraphManager & renderNodeGraphMgr)127 RenderHandleReference CreateBackBufferGpuBufferRenderNodeGraph(RenderNodeGraphManager& renderNodeGraphMgr)
128 {
129 RenderNodeGraphDesc rngd;
130 rngd.renderNodeGraphName = "CORE_RNG_BACKBUFFER_GPUBUFFER";
131 RenderNodeDesc rnd;
132 rnd.typeName = "CORE_RN_BACKBUFFER_GPUBUFFER";
133 rnd.nodeName = "CORE_RN_BACKBUFFER_GPUBUFFER_I";
134 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
135 rngd.nodes.emplace_back(move(rnd));
136
137 return renderNodeGraphMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
138 }
139
140 // Helper for Renderer::InitNodeGraph
InitializeRenderNodeContextData(IRenderContext & renderContext,RenderNodeGraphNodeStore & nodeStore,const bool enableMultiQueue,const RenderingConfiguration & renderConfig)141 unordered_map<string, uint32_t> InitializeRenderNodeContextData(IRenderContext& renderContext,
142 RenderNodeGraphNodeStore& nodeStore, const bool enableMultiQueue, const RenderingConfiguration& renderConfig)
143 {
144 unordered_map<string, uint32_t> renderNodeNameToIndex(nodeStore.renderNodeData.size());
145 vector<ContextInitDescription> contextInitDescs(nodeStore.renderNodeData.size());
146 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
147 const auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
148 PLUGIN_ASSERT(renderNodeData.inputData);
149 PLUGIN_ASSERT(renderNodeData.node);
150 auto& inputData = *(renderNodeData.inputData);
151 auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
152
153 renderNodeNameToIndex[renderNodeData.fullName] = (uint32_t)nodeIdx;
154
155 // reset always, dependencies are redone with new nodes
156 nodeContextData.submitInfo.signalSemaphore = false;
157 nodeContextData.submitInfo.waitSemaphoreCount = 0;
158 nodeContextData.submitInfo.waitForSwapchainAcquireSignal = false;
159
160 // with dynamic render node graphs, single nodes can be initialized
161 // set to true when doing the renderNode->InitNode();
162 if (nodeContextData.initialized) {
163 continue;
164 }
165
166 auto& contextInitRef = contextInitDescs[nodeIdx];
167 contextInitRef.requestedQueue = inputData.queue;
168
169 Device& device = (Device&)renderContext.GetDevice();
170 contextInitRef.requestedQueue = device.GetValidGpuQueue(contextInitRef.requestedQueue);
171
172 ShaderManager& shaderMgr = (ShaderManager&)renderContext.GetDevice().GetShaderManager();
173 GpuResourceManager& gpuResourceMgr = (GpuResourceManager&)renderContext.GetDevice().GetGpuResourceManager();
174 // ordering is important
175 nodeContextData.nodeContextPsoMgr = make_unique<NodeContextPsoManager>(device, shaderMgr);
176 nodeContextData.nodeContextDescriptorSetMgr = device.CreateNodeContextDescriptorSetManager();
177 nodeContextData.renderCommandList = make_unique<RenderCommandList>(*nodeContextData.nodeContextDescriptorSetMgr,
178 gpuResourceMgr, *nodeContextData.nodeContextPsoMgr, contextInitRef.requestedQueue, enableMultiQueue);
179 nodeContextData.contextPoolMgr =
180 device.CreateNodeContextPoolManager(gpuResourceMgr, contextInitRef.requestedQueue);
181 RenderNodeGraphData rngd = { nodeStore.renderNodeGraphName, nodeStore.renderNodeGraphDataStoreName,
182 renderConfig };
183 RenderNodeContextManager::CreateInfo rncmci { renderContext, rngd, *renderNodeData.inputData,
184 renderNodeData.nodeName, renderNodeData.nodeJson, *nodeStore.renderNodeGpuResourceMgr,
185 *nodeContextData.nodeContextDescriptorSetMgr, *nodeContextData.nodeContextPsoMgr,
186 *nodeContextData.renderCommandList, *nodeStore.renderNodeGraphShareDataMgr };
187 nodeContextData.renderNodeContextManager = make_unique<RenderNodeContextManager>(rncmci);
188 #if ((RENDER_VALIDATION_ENABLED == 1) || (RENDER_VULKAN_VALIDATION_ENABLED == 1))
189 nodeContextData.nodeContextDescriptorSetMgr->SetValidationDebugName(renderNodeData.fullName);
190 nodeContextData.contextPoolMgr->SetValidationDebugName(renderNodeData.fullName);
191 #endif
192 nodeContextData.renderBarrierList = make_unique<RenderBarrierList>(
193 (contextInitRef.requestedQueue.type != GpuQueue::QueueType::UNDEFINED) ? 4u : 0u);
194 }
195 return renderNodeNameToIndex;
196 }
197
198 // Helper for Renderer::InitNodeGraph
PatchSignaling(RenderNodeGraphNodeStore & nodeStore,const unordered_map<string,uint32_t> & renderNodeNameToIndex)199 void PatchSignaling(RenderNodeGraphNodeStore& nodeStore, const unordered_map<string, uint32_t>& renderNodeNameToIndex)
200 {
201 PLUGIN_ASSERT(renderNodeNameToIndex.size() == nodeStore.renderNodeData.size());
202 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
203 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
204 const auto& nodeInputDataRef = *(nodeStore.renderNodeData[nodeIdx].inputData);
205 auto& submitInfo = nodeStore.renderNodeContextData[nodeIdx].submitInfo;
206
207 for (const auto& nodeNameRef : nodeInputDataRef.gpuQueueWaitForSignals.nodeNames) {
208 if (const auto iter = renderNodeNameToIndex.find(nodeNameRef); iter != renderNodeNameToIndex.cend()) {
209 if (submitInfo.waitSemaphoreCount < PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS) {
210 const uint32_t index = iter->second;
211 // mark node to signal
212 nodeStore.renderNodeContextData[index].submitInfo.signalSemaphore = true;
213
214 submitInfo.waitSemaphoreNodeIndices[submitInfo.waitSemaphoreCount] = index;
215 submitInfo.waitSemaphoreCount++;
216 } else {
217 PLUGIN_LOG_E("render node can wait only for (%u) render node signals",
218 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS);
219 PLUGIN_ASSERT(false);
220 }
221 } else {
222 PLUGIN_LOG_E("invalid render node wait signal dependency");
223 PLUGIN_ASSERT(false);
224 }
225 }
226 }
227 }
228
229 // Helper for Renderer::RenderFrame
BeginRenderNodeGraph(const vector<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const RenderNodeContextManager::PerFrameTimings & timings)230 void BeginRenderNodeGraph(const vector<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
231 const RenderNodeContextManager::PerFrameTimings& timings)
232 {
233 for (const RenderNodeGraphNodeStore* renderNodeDataStore : renderNodeGraphNodeStores) {
234 const uint32_t renderNodeCount = static_cast<uint32_t>(renderNodeDataStore->renderNodeContextData.size());
235 auto& rngShareData = renderNodeDataStore->renderNodeGraphShareData;
236 renderNodeDataStore->renderNodeGraphShareDataMgr->BeginFrame(renderNodeCount,
237 { rngShareData.inputs, rngShareData.inputCount }, { rngShareData.outputs, rngShareData.outputCount });
238 for (uint32_t idx = 0; idx < renderNodeCount; ++idx) {
239 const RenderNodeContextData& contextData = renderNodeDataStore->renderNodeContextData[idx];
240 contextData.renderCommandList->BeginFrame();
241 contextData.renderBarrierList->BeginFrame();
242 contextData.contextPoolMgr->BeginFrame();
243 contextData.nodeContextDescriptorSetMgr->BeginFrame();
244 contextData.renderNodeContextManager->BeginFrame(idx, timings);
245 }
246 }
247 }
248
249 // Helper for Renderer::RenderFrame
GetRenderNodeGraphNodeStores(array_view<const RenderHandle> inputs,RenderNodeGraphManager & renderNodeGraphMgr)250 inline vector<RenderNodeGraphNodeStore*> GetRenderNodeGraphNodeStores(
251 array_view<const RenderHandle> inputs, RenderNodeGraphManager& renderNodeGraphMgr)
252 {
253 vector<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
254 renderNodeGraphNodeStores.reserve(inputs.size());
255 for (auto const& input : inputs) {
256 renderNodeGraphNodeStores.emplace_back(renderNodeGraphMgr.Get(input));
257 }
258 return renderNodeGraphNodeStores;
259 }
260
261 // Helper for Renderer::RenderFrame
WaitForFence(const Device & device,RenderFrameSync & renderFrameSync)262 inline bool WaitForFence(const Device& device, RenderFrameSync& renderFrameSync)
263 {
264 RENDER_CPU_PERF_SCOPE("Renderer", "Renderer", "WaitForFrameFence_Cpu");
265 renderFrameSync.WaitForFrameFence();
266
267 return device.GetDeviceStatus();
268 }
269
270 // Helper for Renderer::RenderFrame
ProcessRenderNodeGraph(const Device & device,RenderGraph & renderGraph,array_view<RenderNodeGraphNodeStore * > graphNodeStoreView)271 inline void ProcessRenderNodeGraph(
272 const Device& device, RenderGraph& renderGraph, array_view<RenderNodeGraphNodeStore*> graphNodeStoreView)
273 {
274 RENDER_CPU_PERF_SCOPE("Renderer", "Renderer", "RenderGraph_Cpu");
275 const RenderHandle backbufferHandle = device.GetBackbufferHandle();
276 renderGraph.ProcessRenderNodeGraph(backbufferHandle, graphNodeStoreView);
277 }
278
279 // Helper for Renderer::ExecuteRenderNodes
CreateGpuResourcesWithRenderNodes(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,IRenderDataStoreManager & renderData,ShaderManager & shaderMgr)280 void CreateGpuResourcesWithRenderNodes(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
281 IRenderDataStoreManager& renderData, ShaderManager& shaderMgr)
282 {
283 for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
284 PLUGIN_ASSERT(renderNodeGraphNodeStores[graphIdx]);
285
286 RenderNodeGraphNodeStore const& nodeStore = *renderNodeGraphNodeStores[graphIdx];
287 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
288 IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
289 renderNode.PreExecuteFrame();
290 }
291 }
292 }
293
294 // Helper for Renderer::ExecuteRenderNodes
RenderNodeExecution(RenderNodeExecutionParameters & params)295 void RenderNodeExecution(RenderNodeExecutionParameters& params)
296 {
297 #if (RENDER_PERF_ENABLED == 1)
298 size_t allNodeIdx = 0;
299 #endif
300 uint64_t taskId = 0;
301 for (const auto* nodeStorePtr : params.renderNodeGraphNodeStores) {
302 PLUGIN_ASSERT(nodeStorePtr);
303 const auto& nodeStore = *nodeStorePtr;
304
305 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
306 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].node);
307 IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
308 RenderNodeContextData const& renderNodeContextData = nodeStore.renderNodeContextData[nodeIdx];
309 RenderCommandList& renderCommandList = *renderNodeContextData.renderCommandList;
310
311 #if (RENDER_PERF_ENABLED == 1)
312 auto& timerRef = params.nodeTimers[allNodeIdx];
313 timerRef.debugName = nodeStore.renderNodeData[nodeIdx].fullName;
314 params.queue->Submit(taskId++, FunctionTask::Create([&timerRef, &renderNode, &renderCommandList]() {
315 timerRef.timer.Begin();
316
317 renderCommandList.BeforeRenderNodeExecuteFrame();
318 renderNode.ExecuteFrame(renderCommandList);
319 renderCommandList.AfterRenderNodeExecuteFrame();
320
321 timerRef.timer.End();
322 }));
323 allNodeIdx++;
324 #else
325 params.queue->Submit(taskId++, FunctionTask::Create([&renderCommandList, &renderNode]() {
326 renderCommandList.BeforeRenderNodeExecuteFrame();
327 renderNode.ExecuteFrame(renderCommandList);
328 renderCommandList.AfterRenderNodeExecuteFrame();
329 }));
330 #endif
331 }
332 }
333
334 // Execute and wait for completion.
335 params.queue->Execute();
336 }
337
338 // Helper for Renderer::ExecuteRenderBackend
IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,RenderCommandFrameData & rcfd,const bool & multiQueueEnabled)339 void IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
340 RenderCommandFrameData& rcfd, const bool& multiQueueEnabled)
341 {
342 for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
343 PLUGIN_ASSERT(renderNodeGraphNodeStores[graphIdx]);
344
345 RenderNodeGraphNodeStore const& nodeStore = *renderNodeGraphNodeStores[graphIdx];
346
347 unordered_map<uint32_t, uint32_t> nodeIdxToRenderCommandContextIdx;
348 const uint32_t multiQueuePatchBeginIdx = (uint32_t)rcfd.renderCommandContexts.size();
349 uint32_t multiQueuePatchCount = 0;
350 if (multiQueueEnabled) {
351 nodeIdxToRenderCommandContextIdx.reserve(nodeStore.renderNodeContextData.size());
352 }
353
354 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeContextData.size(); ++nodeIdx) {
355 const auto& ref = nodeStore.renderNodeContextData[nodeIdx];
356 PLUGIN_ASSERT((ref.renderCommandList != nullptr) && (ref.renderBarrierList != nullptr) &&
357 (ref.nodeContextPsoMgr != nullptr) && (ref.contextPoolMgr != nullptr));
358 const bool valid = (ref.renderCommandList->HasValidRenderCommands()) ? true : false;
359 if (valid) {
360 if (multiQueueEnabled) {
361 nodeIdxToRenderCommandContextIdx[(uint32_t)nodeIdx] = (uint32_t)rcfd.renderCommandContexts.size();
362 multiQueuePatchCount++;
363 }
364
365 rcfd.renderCommandContexts.push_back({ ref.renderBackendNode, ref.renderCommandList.get(),
366 ref.renderBarrierList.get(), ref.nodeContextPsoMgr.get(), ref.nodeContextDescriptorSetMgr.get(),
367 ref.contextPoolMgr.get(), ref.renderCommandList->HasMultiRenderCommandListSubpasses(),
368 ref.renderCommandList->GetMultiRenderCommandListSubpassCount(), (uint32_t)nodeIdx, ref.submitInfo,
369 nodeStore.renderNodeData[nodeIdx].fullName });
370 }
371 }
372
373 if (multiQueueEnabled) { // patch correct render command context indices
374 for (uint32_t idx = multiQueuePatchBeginIdx; idx < multiQueuePatchCount; ++idx) {
375 auto& ref = rcfd.renderCommandContexts[idx];
376 const auto& nodeContextRef = nodeStore.renderNodeContextData[ref.renderGraphRenderNodeIndex];
377
378 ref.submitDepencies.signalSemaphore = nodeContextRef.submitInfo.signalSemaphore;
379 ref.submitDepencies.waitSemaphoreCount = nodeContextRef.submitInfo.waitSemaphoreCount;
380 for (uint32_t waitIdx = 0; waitIdx < ref.submitDepencies.waitSemaphoreCount; ++waitIdx) {
381 const uint32_t currRenderNodeIdx = nodeContextRef.submitInfo.waitSemaphoreNodeIndices[waitIdx];
382 PLUGIN_ASSERT(nodeIdxToRenderCommandContextIdx.count(currRenderNodeIdx) == 1);
383
384 ref.submitDepencies.waitSemaphoreNodeIndices[waitIdx] =
385 nodeIdxToRenderCommandContextIdx[currRenderNodeIdx];
386 }
387 }
388 }
389 }
390 }
391
392 template<typename T>
IsNull(T * ptr)393 inline bool IsNull(T* ptr)
394 {
395 return ptr == nullptr;
396 }
397
GetTimeStampNow()398 inline int64_t GetTimeStampNow()
399 {
400 using namespace std::chrono;
401 using Clock = system_clock;
402 return Clock::now().time_since_epoch().count();
403 }
404 } // namespace
405
Renderer(IRenderContext & context)406 Renderer::Renderer(IRenderContext& context)
407 : renderContext_(context), device_(static_cast<Device&>(context.GetDevice())),
408 gpuResourceMgr_(static_cast<GpuResourceManager&>(device_.GetGpuResourceManager())),
409 shaderMgr_(static_cast<ShaderManager&>(device_.GetShaderManager())),
410 renderNodeGraphMgr_(static_cast<RenderNodeGraphManager&>(context.GetRenderNodeGraphManager())),
411 renderDataStoreMgr_(static_cast<RenderDataStoreManager&>(context.GetRenderDataStoreManager())),
412 renderUtil_(static_cast<RenderUtil&>(context.GetRenderUtil()))
413
414 {
415 const auto factory = GetInstance<ITaskQueueFactory>(UID_TASK_QUEUE_FACTORY);
416 threadPool_ = factory->CreateThreadPool(factory->GetNumberOfCores());
417 parallelQueue_ = factory->CreateParallelTaskQueue(threadPool_);
418 sequentialQueue_ = factory->CreateSequentialTaskQueue(threadPool_);
419
420 renderConfig_ = { device_.GetBackendType(), RenderingConfiguration::NdcOrigin::TOP_LEFT };
421 #if ((RENDER_HAS_GL_BACKEND) || (RENDER_HAS_GLES_BACKEND)) && (RENDER_GL_FLIP_Y_SWAPCHAIN == 0)
422 // The flag is for informative purposes only.
423 if ((renderConfig_.renderBackend == DeviceBackendType::OPENGL) ||
424 (renderConfig_.renderBackend == DeviceBackendType::OPENGLES)) {
425 renderConfig_.ndcOrigin = RenderingConfiguration::NdcOrigin::BOTTOM_LEFT;
426 }
427 #endif
428
429 renderGraph_ = make_unique<RenderGraph>(gpuResourceMgr_);
430 renderBackend_ = device_.CreateRenderBackend(gpuResourceMgr_, parallelQueue_);
431 renderFrameSync_ = device_.CreateRenderFrameSync();
432
433 { // default render node graph for staging
434 RenderNodeGraphDesc rngd;
435 {
436 RenderNodeDesc rnd;
437 rnd.typeName = "CORE_RN_STAGING";
438 rnd.nodeName = "CORE_RN_STAGING_I";
439 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
440 rngd.nodes.emplace_back(move(rnd));
441 }
442 #if (RENDER_VULKAN_RT_ENABLED == 1)
443 if (device_.GetBackendType() == DeviceBackendType::VULKAN) {
444 RenderNodeDesc rnd;
445 rnd.typeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING";
446 rnd.nodeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING_I";
447 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
448 rngd.nodes.emplace_back(move(rnd));
449 }
450 #endif
451 defaultStagingRng_ = renderNodeGraphMgr_.Create(
452 IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
453 }
454 }
455
~Renderer()456 Renderer::~Renderer() {}
457
InitNodeGraph(RenderHandle renderNodeGraphHandle)458 void Renderer::InitNodeGraph(RenderHandle renderNodeGraphHandle)
459 {
460 auto renderNodeDataStore = renderNodeGraphMgr_.Get(renderNodeGraphHandle);
461 if (!renderNodeDataStore) {
462 return;
463 }
464
465 RenderNodeGraphNodeStore& nodeStore = *renderNodeDataStore;
466 if (nodeStore.initialized) {
467 return;
468 }
469 nodeStore.initialized = true;
470
471 // create render node graph specific managers if not created yet
472 if (!nodeStore.renderNodeGpuResourceMgr) {
473 nodeStore.renderNodeGpuResourceMgr = make_unique<RenderNodeGpuResourceManager>(gpuResourceMgr_);
474 }
475
476 const bool enableMultiQueue = (device_.GetGpuQueueCount() > 1);
477
478 // serial, initialize render node context data
479 auto renderNodeNameToIndex =
480 InitializeRenderNodeContextData(renderContext_, nodeStore, enableMultiQueue, renderConfig_);
481
482 if (enableMultiQueue) {
483 // patch gpu queue signaling
484 PatchSignaling(nodeStore, renderNodeNameToIndex);
485 }
486
487 // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
488 nodeStore.renderNodeGraphShareDataMgr->BeginFrame(static_cast<uint32_t>(nodeStore.renderNodeData.size()),
489 { nodeStore.renderNodeGraphShareData.inputs, nodeStore.renderNodeGraphShareData.inputCount },
490 { nodeStore.renderNodeGraphShareData.outputs, nodeStore.renderNodeGraphShareData.outputCount });
491
492 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
493 auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
494 PLUGIN_ASSERT(renderNodeData.node);
495 IRenderNode& renderNode = *(renderNodeData.node);
496 auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
497
498 if (nodeContextData.initialized) {
499 continue;
500 }
501 nodeContextData.initialized = true;
502 // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
503 const RenderNodeContextManager::PerFrameTimings timings { 0, 0, device_.GetFrameCount() };
504 nodeContextData.renderNodeContextManager->BeginFrame(static_cast<uint32_t>(nodeIdx), timings);
505
506 RENDER_CPU_PERF_SCOPE("Renderer", "Renderer_InitNode_Cpu", renderNodeData.fullName);
507
508 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
509 renderNode.InitNode(*(nodeContextData.renderNodeContextManager));
510 }
511 }
512
513 // Helper for Renderer::RenderFrame
RemapBackBufferHandle(const IRenderDataStoreManager & renderData)514 void Renderer::RemapBackBufferHandle(const IRenderDataStoreManager& renderData)
515 {
516 const auto* dataStorePod = static_cast<IRenderDataStorePod*>(renderData.GetRenderDataStore("RenderDataStorePod"));
517 if (dataStorePod) {
518 auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
519 const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
520 if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) {
521 PLUGIN_ASSERT(device_.HasSwapchain());
522 const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
523 if (!RenderHandleUtil::IsValid(handle)) {
524 const RenderHandle backBufferHandle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
525 const RenderHandle firstSwapchain = gpuResourceMgr_.GetImageRawHandle("CORE_DEFAULT_SWAPCHAIN_0");
526 gpuResourceMgr_.RemapGpuImageHandle(backBufferHandle, firstSwapchain);
527 }
528 } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE) {
529 const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
530 if (RenderHandleUtil::IsValid(handle) && (bb->backBufferHandle)) {
531 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle.GetHandle());
532 }
533 } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
534 const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
535 if (RenderHandleUtil::IsValid(handle) && (bb->backBufferHandle) && (bb->gpuBufferHandle)) {
536 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle.GetHandle());
537 }
538 }
539 }
540 }
541
RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)542 void Renderer::RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)
543 {
544 Tick();
545 frameTimes_.begin = GetTimeStampNow();
546 RENDER_CPU_PERF_SCOPE("Renderer", "Frame", "RenderFrame");
547
548 if (device_.GetDeviceStatus() == false) {
549 ProcessTimeStampEnd();
550 PLUGIN_LOG_ONCE_E("invalid_device_status_render_frame", "invalid device for rendering");
551 return;
552 }
553 device_.Activate();
554 device_.FrameStart();
555
556 renderNodeGraphMgr_.HandlePendingAllocations();
557 renderDataStoreMgr_.PreRender();
558
559 ProcessShaderReload(device_, shaderMgr_, renderNodeGraphMgr_, renderNodeGraphs);
560 // create new shaders if any created this frame (needs to be called before render node init)
561 shaderMgr_.HandlePendingAllocations();
562
563 // update render node graphs with default staging and possible dev gui render node graphs
564 const auto renderNodeGraphInputVector = GatherInputs(renderNodeGraphs);
565
566 const auto renderNodeGraphInputs = array_view(renderNodeGraphInputVector.data(), renderNodeGraphInputVector.size());
567
568 for (const auto& ref : renderNodeGraphInputs) {
569 InitNodeGraph(ref);
570 }
571 device_.Deactivate();
572
573 renderGraph_->BeginFrame();
574 renderFrameSync_->BeginFrame();
575
576 auto graphNodeStores = GetRenderNodeGraphNodeStores(renderNodeGraphInputs, renderNodeGraphMgr_);
577 if (std::any_of(graphNodeStores.begin(), graphNodeStores.end(), IsNull<RenderNodeGraphNodeStore>)) {
578 ProcessTimeStampEnd();
579 PLUGIN_LOG_W("invalid render node graphs for rendering");
580 return;
581 }
582
583 // NOTE: by node graph name find data
584 // NOTE: deprecate this
585 RemapBackBufferHandle(renderDataStoreMgr_);
586
587 // NodeContextPoolManagerGLES::BeginFrame may delete FBOs and device must be active.
588 device_.Activate();
589
590 // begin frame (advance ring buffers etc.)
591 const RenderNodeContextManager::PerFrameTimings timings { previousFrameTime_ - firstTime_, deltaTime_,
592 device_.GetFrameCount() };
593 BeginRenderNodeGraph(graphNodeStores, timings);
594
595 // synchronize, needed for persistantly mapped gpu buffer writing
596 if (!WaitForFence(device_, *renderFrameSync_)) {
597 device_.Deactivate();
598 return; // possible lost device with frame fence
599 }
600
601 // gpu resource allocation and deallocation
602 gpuResourceMgr_.HandlePendingAllocations();
603
604 device_.Deactivate();
605
606 const auto nodeStoresView = array_view<RenderNodeGraphNodeStore*>(graphNodeStores);
607
608 ExecuteRenderNodes(renderNodeGraphInputs, nodeStoresView);
609
610 // render graph process for all render nodes of all render graphs
611 ProcessRenderNodeGraph(device_, *renderGraph_, nodeStoresView);
612
613 device_.SetLockResourceBackendAccess(true);
614 renderDataStoreMgr_.PreRenderBackend();
615
616 ExecuteRenderBackend(renderNodeGraphInputs, nodeStoresView);
617
618 device_.SetLockResourceBackendAccess(false);
619 renderDataStoreMgr_.PostRender();
620
621 device_.FrameEnd();
622 ProcessTimeStampEnd();
623 }
624
RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)625 void Renderer::RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)
626 {
627 const auto lock = std::lock_guard(renderMutex_);
628
629 // add only unique and valid handles to list for rendering
630 vector<RenderHandle> rngs;
631 rngs.reserve(renderNodeGraphs.size());
632 for (size_t iIdx = 0; iIdx < renderNodeGraphs.size(); ++iIdx) {
633 const RenderHandle& handle = renderNodeGraphs[iIdx].GetHandle();
634 bool duplicate = false;
635 for (auto& ref : rngs) {
636 if (ref == handle) {
637 duplicate = true;
638 }
639 }
640 if ((RenderHandleUtil::GetHandleType(handle) == RenderHandleType::RENDER_NODE_GRAPH) && (!duplicate)) {
641 rngs.emplace_back(handle);
642 }
643 #if (RENDER_VALIDATION_ENABLED == 1)
644 if (duplicate) {
645 PLUGIN_LOG_ONCE_E("renderer_rf_duplicate_rng",
646 "RENDER_VALIDATION: duplicate render node graphs are not supported (idx: %u, id: %" PRIx64,
647 static_cast<uint32_t>(iIdx), handle.id);
648 }
649 #endif
650 }
651 RenderFrameImpl(rngs);
652 }
653
RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)654 void Renderer::RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)
655 {
656 const auto lock = std::lock_guard(deferredMutex_);
657 for (const auto& ref : renderNodeGraphs) {
658 deferredRenderNodeGraphs_.emplace_back(ref);
659 }
660 }
661
RenderDeferredFrame()662 void Renderer::RenderDeferredFrame()
663 {
664 deferredMutex_.lock();
665 decltype(deferredRenderNodeGraphs_) renderNodeGraphs = move(deferredRenderNodeGraphs_);
666 deferredMutex_.unlock();
667 RenderFrame(renderNodeGraphs);
668 }
669
ExecuteRenderNodes(const array_view<const RenderHandle> renderNodeGraphInputs,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)670 void Renderer::ExecuteRenderNodes(const array_view<const RenderHandle> renderNodeGraphInputs,
671 const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
672 {
673 #if (RENDER_PERF_ENABLED == 1)
674 RENDER_CPU_PERF_BEGIN(fullExecuteCpuTimer, "Renderer", "Renderer", "ExecuteAllNodes_Cpu");
675
676 size_t allRenderNodeCount = 0;
677 for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
678 allRenderNodeCount += renderNodeGraphNodeStores[graphIdx]->renderNodeData.size();
679 }
680
681 vector<NodeTimerData> nodeTimers(allRenderNodeCount);
682 #endif
683
684 ITaskQueue* queue = nullptr;
685 if (device_.AllowThreadedProcessing()) {
686 queue = parallelQueue_.get();
687 } else {
688 queue = sequentialQueue_.get();
689 }
690
691 // single threaded gpu resource creation with render nodes
692 CreateGpuResourcesWithRenderNodes(renderNodeGraphNodeStores, renderDataStoreMgr_, shaderMgr_);
693
694 // lock staging data for this frame
695 gpuResourceMgr_.LockFrameStagingData();
696 // final gpu resource allocation and dealloation before low level engine resource handle lock-up
697 // we do not allocate or deallocate resources after RenderNode::ExecuteFrame()
698 device_.Activate();
699 gpuResourceMgr_.HandlePendingAllocations();
700 device_.Deactivate();
701
702 // process render node graph render node share preparations
703 for (auto& ref : renderNodeGraphNodeStores) {
704 ref->renderNodeGraphShareDataMgr->PrepareExecuteFrame();
705 }
706
707 RenderNodeExecutionParameters params = {
708 renderNodeGraphNodeStores,
709 #if (RENDER_PERF_ENABLED == 1)
710 nodeTimers,
711 #endif
712 queue,
713 renderDataStoreMgr_,
714 shaderMgr_,
715 renderConfig_
716 };
717
718 // multi-threaded render node execution
719 RenderNodeExecution(params);
720
721 // Remove tasks.
722 queue->Clear();
723
724 #if (RENDER_PERF_ENABLED == 1)
725 RENDER_CPU_PERF_END(fullExecuteCpuTimer);
726
727 if (auto* inst = GetInstance<IPerformanceDataManagerFactory>(UID_PERFORMANCE_FACTORY); inst) {
728 if (IPerformanceDataManager* perfData = inst->Get("RenderNode"); perfData) {
729 for (size_t nodeIdx = 0; nodeIdx < nodeTimers.size(); ++nodeIdx) {
730 const auto& timerRef = nodeTimers[nodeIdx];
731 perfData->UpdateData(timerRef.debugName, "RenderNodeExecute_Cpu", timerRef.timer.GetMicroseconds());
732 }
733 }
734 }
735 #endif
736 }
737
ExecuteRenderBackend(const array_view<const RenderHandle> renderNodeGraphInputs,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)738 void Renderer::ExecuteRenderBackend(const array_view<const RenderHandle> renderNodeGraphInputs,
739 const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
740 {
741 size_t allRenderNodeCount = 0;
742 for (const auto nodeStore : renderNodeGraphNodeStores) {
743 PLUGIN_ASSERT(nodeStore);
744 allRenderNodeCount += nodeStore->renderNodeData.size();
745 }
746
747 RenderCommandFrameData rcfd;
748 PLUGIN_ASSERT(renderFrameSync_);
749 rcfd.renderFrameSync = renderFrameSync_.get();
750 rcfd.renderCommandContexts.reserve(allRenderNodeCount);
751
752 const bool multiQueueEnabled = (device_.GetGpuQueueCount() > 1u);
753
754 IterateRenderBackendNodeGraphNodeStores(renderNodeGraphNodeStores, rcfd, multiQueueEnabled);
755
756 // NOTE: by node graph name
757 // NOTE: deprecate this
758 const RenderGraph::BackbufferState bbState = renderGraph_->GetBackbufferResourceState();
759 RenderBackendBackBufferConfiguration config { bbState.state, bbState.layout, {} };
760 {
761 auto const dataStorePod =
762 static_cast<IRenderDataStorePod const*>(renderDataStoreMgr_.GetRenderDataStore("RenderDataStorePod"));
763 if (dataStorePod) {
764 auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
765 const NodeGraphBackBufferConfiguration* bb = (const NodeGraphBackBufferConfiguration*)dataView.data();
766 config.config = *bb;
767 }
768 }
769
770 if (!rcfd.renderCommandContexts.empty()) { // do not execute backend with zero work
771 device_.SetRenderBackendRunning(true);
772 frameTimes_.beginBackend = GetTimeStampNow();
773 device_.Activate();
774 renderBackend_->Render(rcfd, config);
775 frameTimes_.beginBackendPresent = GetTimeStampNow();
776 renderBackend_->Present(config);
777 device_.Deactivate();
778 frameTimes_.endBackend = GetTimeStampNow();
779 device_.SetRenderBackendRunning(false);
780 }
781
782 device_.Activate();
783 gpuResourceMgr_.EndFrame();
784 device_.Deactivate();
785 }
786
GatherInputs(const array_view<const RenderHandle> renderNodeGraphInputList)787 vector<RenderHandle> Renderer::GatherInputs(const array_view<const RenderHandle> renderNodeGraphInputList)
788 {
789 vector<RenderHandle> renderNodeGraphInputsVector;
790 size_t defaultRenderNodeGraphCount = 1;
791 #if (RENDER_DEV_ENABLED == 1)
792 defaultRenderNodeGraphCount += 1;
793 #endif
794 renderNodeGraphInputsVector.reserve(renderNodeGraphInputList.size() + defaultRenderNodeGraphCount);
795 renderNodeGraphInputsVector.emplace_back(defaultStagingRng_.GetHandle());
796 renderNodeGraphInputsVector.insert(renderNodeGraphInputsVector.end(), renderNodeGraphInputList.begin().ptr(),
797 renderNodeGraphInputList.end().ptr());
798 if (const auto* dataStorePod =
799 static_cast<IRenderDataStorePod*>(renderDataStoreMgr_.GetRenderDataStore("RenderDataStorePod"));
800 dataStorePod) {
801 auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
802 const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
803 if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
804 if (!defaultBackBufferGpuBufferRng_) {
805 defaultBackBufferGpuBufferRng_ = CreateBackBufferGpuBufferRenderNodeGraph(renderNodeGraphMgr_);
806 // we have passed render node graph pending allocations, re-allocate
807 renderNodeGraphMgr_.HandlePendingAllocations();
808 }
809 renderNodeGraphInputsVector.emplace_back(defaultBackBufferGpuBufferRng_.GetHandle());
810 }
811 }
812 return renderNodeGraphInputsVector;
813 }
814
ProcessTimeStampEnd()815 void Renderer::ProcessTimeStampEnd()
816 {
817 frameTimes_.end = GetTimeStampNow();
818
819 int64_t finalTime = frameTimes_.begin;
820 finalTime = Math::max(finalTime, frameTimes_.beginBackend);
821 frameTimes_.beginBackend = finalTime;
822
823 finalTime = Math::max(finalTime, frameTimes_.beginBackendPresent);
824 frameTimes_.beginBackendPresent = finalTime;
825
826 finalTime = Math::max(finalTime, frameTimes_.endBackend);
827 frameTimes_.endBackend = finalTime;
828
829 finalTime = Math::max(finalTime, frameTimes_.end);
830 frameTimes_.end = finalTime;
831
832 PLUGIN_ASSERT(frameTimes_.end >= frameTimes_.endBackend);
833 PLUGIN_ASSERT(frameTimes_.endBackend >= frameTimes_.beginBackendPresent);
834 PLUGIN_ASSERT(frameTimes_.beginBackendPresent >= frameTimes_.beginBackend);
835 PLUGIN_ASSERT(frameTimes_.beginBackend >= frameTimes_.begin);
836
837 renderUtil_.SetRenderTimings(frameTimes_);
838 frameTimes_ = {};
839 }
840
Tick()841 void Renderer::Tick()
842 {
843 using namespace std::chrono;
844 const auto currentTime =
845 static_cast<uint64_t>(duration_cast<microseconds>(high_resolution_clock::now().time_since_epoch()).count());
846
847 if (firstTime_ == ~0u) {
848 previousFrameTime_ = firstTime_ = currentTime;
849 }
850 deltaTime_ = currentTime - previousFrameTime_;
851 constexpr auto limitHz = duration_cast<microseconds>(duration<float, std::ratio<1, 15u>>(1)).count();
852 if (deltaTime_ > limitHz) {
853 deltaTime_ = limitHz; // clamp the time step to no longer than 15hz.
854 }
855 previousFrameTime_ = currentTime;
856 }
857 RENDER_END_NAMESPACE()
858