1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "renderer.h"
17
18 #include <algorithm>
19 #include <chrono>
20
21 #include <base/containers/string.h>
22 #include <base/containers/string_view.h>
23 #include <base/containers/unordered_map.h>
24 #include <base/containers/vector.h>
25 #include <base/math/mathf.h>
26 #if (RENDER_PERF_ENABLED == 1)
27 #include <core/perf/intf_performance_data_manager.h>
28 #endif
29 #include <render/datastore/intf_render_data_store_default_staging.h>
30 #include <render/datastore/intf_render_data_store_manager.h>
31 #include <render/datastore/render_data_store_render_pods.h>
32 #include <render/intf_render_context.h>
33 #include <render/intf_renderer.h>
34 #include <render/namespace.h>
35 #include <render/nodecontext/intf_render_node.h>
36 #include <render/render_data_structures.h>
37
38 #include "perf/cpu_perf_scope.h"
39
40 #if (RENDER_VALIDATION_ENABLED == 1)
41 #include <cinttypes>
42 #endif
43
44 #include "datastore/render_data_store_manager.h"
45 #include "datastore/render_data_store_pod.h"
46 #include "default_engine_constants.h"
47 #include "device/device.h"
48 #include "device/gpu_resource_cache.h"
49 #include "device/gpu_resource_manager.h"
50 #include "device/gpu_resource_util.h"
51 #include "device/render_frame_sync.h"
52 #include "device/shader_manager.h"
53 #include "nodecontext/node_context_pso_manager.h"
54 #include "nodecontext/render_node_context_manager.h"
55 #include "nodecontext/render_node_graph_manager.h"
56 #include "nodecontext/render_node_graph_node_store.h"
57 #include "perf/cpu_timer.h"
58 #include "render_backend.h"
59 #include "render_context.h"
60 #include "render_graph.h"
61 #include "util/log.h"
62 #include "util/render_util.h"
63
64 using namespace BASE_NS;
65 using namespace CORE_NS;
66
67 RENDER_BEGIN_NAMESPACE()
68 namespace {
69 CORE_PROFILER_SYMBOL(FRAME_MARKER, "Render");
70
71 const string_view RENDER_DATA_STORE_DEFAULT_STAGING { "RenderDataStoreDefaultStaging" };
72
73 // Helper class for running lambda as a ThreadPool task.
74 template<typename Fn>
75 class FunctionTask final : public IThreadPool::ITask {
76 public:
FunctionTask(Fn && func)77 explicit FunctionTask(Fn&& func) : func_(BASE_NS::move(func)) {};
78
operator ()()79 void operator()() override
80 {
81 func_();
82 }
83
84 protected:
Destroy()85 void Destroy() override
86 {
87 delete this;
88 }
89
90 private:
91 Fn func_;
92 };
93
94 template<typename Fn>
CreateFunctionTask(Fn && func)95 inline IThreadPool::ITask::Ptr CreateFunctionTask(Fn&& func)
96 {
97 return IThreadPool::ITask::Ptr { new FunctionTask<Fn>(BASE_NS::move(func)) };
98 }
99
100 #if (RENDER_PERF_ENABLED == 1)
101 struct NodeTimerData {
102 CpuTimer timer;
103 string_view debugName;
104 };
105 #endif
106
107 struct RenderNodeExecutionParameters {
108 const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores;
109 #if (RENDER_PERF_ENABLED == 1)
110 vector<NodeTimerData>& nodeTimers;
111 #endif
112 ITaskQueue* queue;
113 IRenderDataStoreManager& renderData;
114 ShaderManager& shaderManager;
115 };
116
GetThreadPoolThreadCount(const uint32_t numberOfHwCores,const RenderCreateInfo::ThreadPoolCreateInfo & tpci)117 inline uint32_t GetThreadPoolThreadCount(
118 const uint32_t numberOfHwCores, const RenderCreateInfo::ThreadPoolCreateInfo& tpci)
119 {
120 auto threads = static_cast<uint32_t>(static_cast<float>(numberOfHwCores) * tpci.threadCountCoefficient);
121 threads = Math::min(threads, tpci.maxCount);
122 threads = Math::max(threads, tpci.minCount);
123 threads = Math::max(1U, threads); // 1 is minimum
124 PLUGIN_LOG_D("Renderer thread pool thread count: %u", threads);
125 return threads;
126 }
127
128 // Helper for Renderer::InitNodeGraph
InitializeRenderNodeContextData(IRenderContext & renderContext,RenderNodeGraphNodeStore & nodeStore,const bool enableMultiQueue,const RenderingConfiguration & renderConfig)129 unordered_map<string, uint32_t> InitializeRenderNodeContextData(IRenderContext& renderContext,
130 RenderNodeGraphNodeStore& nodeStore, const bool enableMultiQueue, const RenderingConfiguration& renderConfig)
131 {
132 unordered_map<string, uint32_t> renderNodeNameToIndex(nodeStore.renderNodeData.size());
133 vector<ContextInitDescription> contextInitDescs(nodeStore.renderNodeData.size());
134 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
135 const auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
136 PLUGIN_ASSERT(renderNodeData.inputData);
137 PLUGIN_ASSERT(renderNodeData.node);
138 auto& inputData = *(renderNodeData.inputData);
139 auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
140
141 renderNodeNameToIndex[renderNodeData.fullName] = (uint32_t)nodeIdx;
142
143 // reset always, dependencies are redone with new nodes
144 nodeContextData.submitInfo.signalSemaphore = false;
145 nodeContextData.submitInfo.waitSemaphoreCount = 0;
146 nodeContextData.submitInfo.waitForSwapchainAcquireSignal = false;
147
148 // with dynamic render node graphs, single nodes can be initialized
149 // set to true when doing the renderNode->InitNode();
150 if (nodeContextData.initialized) {
151 continue;
152 }
153
154 auto& contextInitRef = contextInitDescs[nodeIdx];
155 contextInitRef.requestedQueue = inputData.queue;
156
157 auto& device = (Device&)renderContext.GetDevice();
158 contextInitRef.requestedQueue = device.GetValidGpuQueue(contextInitRef.requestedQueue);
159
160 auto& shaderMgr = (ShaderManager&)renderContext.GetDevice().GetShaderManager();
161 auto& gpuResourceMgr = (GpuResourceManager&)renderContext.GetDevice().GetGpuResourceManager();
162 // ordering is important
163 nodeContextData.nodeContextPsoMgr = make_unique<NodeContextPsoManager>(device, shaderMgr);
164 nodeContextData.nodeContextDescriptorSetMgr = device.CreateNodeContextDescriptorSetManager();
165 nodeContextData.renderCommandList =
166 make_unique<RenderCommandList>(renderNodeData.fullName, *nodeContextData.nodeContextDescriptorSetMgr,
167 gpuResourceMgr, *nodeContextData.nodeContextPsoMgr, contextInitRef.requestedQueue, enableMultiQueue);
168 nodeContextData.nodeContextPoolMgr =
169 device.CreateNodeContextPoolManager(gpuResourceMgr, contextInitRef.requestedQueue);
170 RenderNodeGraphData rngd = { nodeStore.renderNodeGraphName, nodeStore.renderNodeGraphDataStoreName,
171 renderConfig };
172 RenderNodeContextManager::CreateInfo rncmci { renderContext, rngd, *renderNodeData.inputData,
173 renderNodeData.nodeName, renderNodeData.nodeJson, *nodeContextData.nodeContextDescriptorSetMgr,
174 *nodeContextData.nodeContextPsoMgr, *nodeContextData.renderCommandList,
175 *nodeStore.renderNodeGraphShareDataMgr };
176 nodeContextData.renderNodeContextManager = make_unique<RenderNodeContextManager>(rncmci);
177 #if ((RENDER_VALIDATION_ENABLED == 1) || (RENDER_VULKAN_VALIDATION_ENABLED == 1))
178 nodeContextData.nodeContextDescriptorSetMgr->SetValidationDebugName(renderNodeData.fullName);
179 nodeContextData.nodeContextPoolMgr->SetValidationDebugName(renderNodeData.fullName);
180 #endif
181 nodeContextData.renderBarrierList = make_unique<RenderBarrierList>(
182 (contextInitRef.requestedQueue.type != GpuQueue::QueueType::UNDEFINED) ? 4u : 0u);
183 }
184 return renderNodeNameToIndex;
185 }
186
187 // Helper for Renderer::InitNodeGraph
PatchSignaling(RenderNodeGraphNodeStore & nodeStore,const unordered_map<string,uint32_t> & renderNodeNameToIndex)188 void PatchSignaling(RenderNodeGraphNodeStore& nodeStore, const unordered_map<string, uint32_t>& renderNodeNameToIndex)
189 {
190 PLUGIN_ASSERT(renderNodeNameToIndex.size() == nodeStore.renderNodeData.size());
191 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
192 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].inputData);
193 const auto& nodeInputDataRef = *(nodeStore.renderNodeData[nodeIdx].inputData);
194 auto& submitInfo = nodeStore.renderNodeContextData[nodeIdx].submitInfo;
195
196 for (const auto& nodeNameRef : nodeInputDataRef.gpuQueueWaitForSignals.nodeNames) {
197 if (const auto iter = renderNodeNameToIndex.find(nodeNameRef); iter != renderNodeNameToIndex.cend()) {
198 if (submitInfo.waitSemaphoreCount < PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS) {
199 const uint32_t index = iter->second;
200 // mark node to signal
201 nodeStore.renderNodeContextData[index].submitInfo.signalSemaphore = true;
202
203 submitInfo.waitSemaphoreNodeIndices[submitInfo.waitSemaphoreCount] = index;
204 submitInfo.waitSemaphoreCount++;
205 } else {
206 PLUGIN_LOG_E("render node can wait only for (%u) render node signals",
207 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS);
208 PLUGIN_ASSERT(false);
209 }
210 } else {
211 PLUGIN_LOG_E("invalid render node wait signal dependency");
212 PLUGIN_ASSERT(false);
213 }
214 }
215 }
216 }
217
218 // Helper for Renderer::RenderFrame
BeginRenderNodeGraph(RenderNodeGraphGlobalShareDataManager * rngGlobalShareDataMgr,const vector<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const RenderNodeContextManager::PerFrameTimings & timings)219 void BeginRenderNodeGraph(RenderNodeGraphGlobalShareDataManager* rngGlobalShareDataMgr,
220 const vector<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
221 const RenderNodeContextManager::PerFrameTimings& timings)
222 {
223 RenderNodeGraphShareDataManager* prevRngShareDataMgr = nullptr;
224 if (rngGlobalShareDataMgr) {
225 rngGlobalShareDataMgr->BeginFrame();
226 }
227 for (const RenderNodeGraphNodeStore* renderNodeDataStore : renderNodeGraphNodeStores) {
228 const auto renderNodeCount = static_cast<uint32_t>(renderNodeDataStore->renderNodeContextData.size());
229 auto& rngShareData = renderNodeDataStore->renderNodeGraphShareData;
230 renderNodeDataStore->renderNodeGraphShareDataMgr->BeginFrame(rngGlobalShareDataMgr, prevRngShareDataMgr,
231 renderNodeCount, { rngShareData.inputs, rngShareData.inputCount },
232 { rngShareData.outputs, rngShareData.outputCount });
233 for (uint32_t idx = 0; idx < renderNodeCount; ++idx) {
234 const RenderNodeContextData& contextData = renderNodeDataStore->renderNodeContextData[idx];
235 contextData.renderCommandList->BeginFrame();
236 contextData.renderBarrierList->BeginFrame();
237 contextData.nodeContextPoolMgr->BeginFrame();
238 contextData.nodeContextDescriptorSetMgr->BeginFrame();
239 contextData.renderNodeContextManager->BeginFrame(idx, timings);
240 }
241 prevRngShareDataMgr = renderNodeDataStore->renderNodeGraphShareDataMgr.get();
242 }
243 }
244
245 // Helper for Renderer::RenderFrame
FillRngNodeStores(array_view<const RenderHandle> inputs,RenderNodeGraphManager & renderNodeGraphMgr,vector<RenderNodeGraphNodeStore * > & rngNodeStores)246 inline void FillRngNodeStores(array_view<const RenderHandle> inputs, RenderNodeGraphManager& renderNodeGraphMgr,
247 vector<RenderNodeGraphNodeStore*>& rngNodeStores)
248 {
249 rngNodeStores.reserve(inputs.size());
250 for (auto const& input : inputs) {
251 rngNodeStores.push_back(renderNodeGraphMgr.Get(input));
252 }
253 }
254
255 // Helper for Renderer::RenderFrame
WaitForFence(const Device & device,RenderFrameSync & renderFrameSync)256 inline bool WaitForFence(const Device& device, RenderFrameSync& renderFrameSync)
257 {
258 RENDER_CPU_PERF_SCOPE("RenderFrame", "WaitForFrameFence");
259 renderFrameSync.WaitForFrameFence();
260
261 return device.GetDeviceStatus();
262 }
263
264 // Helper for Renderer::RenderFrame
ProcessRenderNodeGraph(Device & device,RenderGraph & renderGraph,array_view<RenderNodeGraphNodeStore * > graphNodeStoreView)265 inline void ProcessRenderNodeGraph(
266 Device& device, RenderGraph& renderGraph, array_view<RenderNodeGraphNodeStore*> graphNodeStoreView)
267 {
268 RENDER_CPU_PERF_SCOPE("RenderFrame", "RenderGraph");
269 renderGraph.ProcessRenderNodeGraph(device.HasSwapchain(), graphNodeStoreView);
270 }
271
272 // Helper for Renderer::ExecuteRenderNodes
RenderNodePreExecution(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores)273 void RenderNodePreExecution(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores)
274 {
275 for (const RenderNodeGraphNodeStore* nodeStore : renderNodeGraphNodeStores) {
276 PLUGIN_ASSERT(nodeStore);
277 for (const auto& nodeIdx : nodeStore->renderNodeData) {
278 IRenderNode& renderNode = *(nodeIdx.node);
279 renderNode.PreExecuteFrame();
280 }
281 }
282 }
283
284 // Helper for Renderer::ExecuteRenderNodes
RenderNodeExecution(RenderNodeExecutionParameters & params)285 void RenderNodeExecution(RenderNodeExecutionParameters& params)
286 {
287 #if (RENDER_PERF_ENABLED == 1)
288 size_t allNodeIdx = 0;
289 #endif
290 uint64_t taskId = 0;
291 for (const auto* nodeStorePtr : params.renderNodeGraphNodeStores) {
292 // there shouldn't be nullptrs but let's play it safe
293 PLUGIN_ASSERT(nodeStorePtr);
294 if (nodeStorePtr) {
295 const auto& nodeStore = *nodeStorePtr;
296 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
297 PLUGIN_ASSERT(nodeStore.renderNodeData[nodeIdx].node);
298 if (nodeStore.renderNodeData[nodeIdx].node) {
299 IRenderNode& renderNode = *(nodeStore.renderNodeData[nodeIdx].node);
300 RenderNodeContextData const& renderNodeContextData = nodeStore.renderNodeContextData[nodeIdx];
301 PLUGIN_ASSERT(renderNodeContextData.renderCommandList);
302 RenderCommandList& renderCommandList = *renderNodeContextData.renderCommandList;
303
304 // Do not run render node if the flag is set
305 if ((renderNode.GetExecuteFlags() &
306 IRenderNode::ExecuteFlagBits::EXECUTE_FLAG_BITS_DO_NOT_EXECUTE) == 0) {
307 #if (RENDER_PERF_ENABLED == 1)
308 auto& timerRef = params.nodeTimers[allNodeIdx++];
309 timerRef.debugName = nodeStore.renderNodeData[nodeIdx].fullName;
310 params.queue->Submit(
311 taskId++, CreateFunctionTask([&timerRef, &renderNode, &renderCommandList]() {
312 RENDER_CPU_PERF_SCOPE("ExecuteRenderNodes", timerRef.debugName);
313 timerRef.timer.Begin();
314
315 renderCommandList.BeforeRenderNodeExecuteFrame();
316 renderNode.ExecuteFrame(renderCommandList);
317 renderCommandList.AfterRenderNodeExecuteFrame();
318
319 timerRef.timer.End();
320 }));
321 #else
322 params.queue->Submit(taskId++, CreateFunctionTask([&renderCommandList, &renderNode]() {
323 renderCommandList.BeforeRenderNodeExecuteFrame();
324 renderNode.ExecuteFrame(renderCommandList);
325 renderCommandList.AfterRenderNodeExecuteFrame();
326 }));
327 #endif
328 }
329 }
330 }
331 }
332 }
333
334 // Execute and wait for completion.
335 params.queue->Execute();
336 }
337
338 // Helper for Renderer::ExecuteRenderBackend
IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,const bool multiQueueEnabled,RenderCommandFrameData & rcfd)339 void IterateRenderBackendNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores,
340 const bool multiQueueEnabled, RenderCommandFrameData& rcfd)
341 {
342 for (const RenderNodeGraphNodeStore* nodeStore : renderNodeGraphNodeStores) {
343 PLUGIN_ASSERT(nodeStore);
344 if (!nodeStore) {
345 continue;
346 }
347
348 unordered_map<uint32_t, uint32_t> nodeIdxToRenderCommandContextIdx;
349 const auto multiQueuePatchBeginIdx = (uint32_t)rcfd.renderCommandContexts.size();
350 uint32_t multiQueuePatchCount = 0;
351 if (multiQueueEnabled) {
352 nodeIdxToRenderCommandContextIdx.reserve(nodeStore->renderNodeContextData.size());
353 }
354
355 for (size_t nodeIdx = 0; nodeIdx < nodeStore->renderNodeContextData.size(); ++nodeIdx) {
356 const auto& ref = nodeStore->renderNodeContextData[nodeIdx];
357 PLUGIN_ASSERT((ref.renderCommandList != nullptr) && (ref.renderBarrierList != nullptr) &&
358 (ref.nodeContextPsoMgr != nullptr) && (ref.nodeContextPoolMgr != nullptr));
359 const bool valid = (ref.renderCommandList->HasValidRenderCommands());
360 if (valid) {
361 if (multiQueueEnabled) {
362 nodeIdxToRenderCommandContextIdx[(uint32_t)nodeIdx] = (uint32_t)rcfd.renderCommandContexts.size();
363 multiQueuePatchCount++;
364 }
365 // get final backend node index of the first render node which uses the swapchain image
366 const auto backendNodeIdx = static_cast<uint32_t>(rcfd.renderCommandContexts.size());
367 if ((rcfd.firstSwapchainNodeIdx > backendNodeIdx) && (ref.submitInfo.waitForSwapchainAcquireSignal)) {
368 rcfd.firstSwapchainNodeIdx = static_cast<uint32_t>(rcfd.renderCommandContexts.size());
369 }
370 rcfd.renderCommandContexts.push_back({ ref.renderBackendNode, ref.renderCommandList.get(),
371 ref.renderBarrierList.get(), ref.nodeContextPsoMgr.get(), ref.nodeContextDescriptorSetMgr.get(),
372 ref.nodeContextPoolMgr.get(), (uint32_t)nodeIdx, ref.submitInfo,
373 nodeStore->renderNodeData[nodeIdx].fullName });
374 }
375 }
376
377 if (multiQueueEnabled) { // patch correct render command context indices
378 for (uint32_t idx = multiQueuePatchBeginIdx; idx < multiQueuePatchCount; ++idx) {
379 auto& ref = rcfd.renderCommandContexts[idx];
380 const auto& nodeContextRef = nodeStore->renderNodeContextData[ref.renderGraphRenderNodeIndex];
381
382 ref.submitDepencies.signalSemaphore = nodeContextRef.submitInfo.signalSemaphore;
383 ref.submitDepencies.waitSemaphoreCount = nodeContextRef.submitInfo.waitSemaphoreCount;
384 for (uint32_t waitIdx = 0; waitIdx < ref.submitDepencies.waitSemaphoreCount; ++waitIdx) {
385 const uint32_t currRenderNodeIdx = nodeContextRef.submitInfo.waitSemaphoreNodeIndices[waitIdx];
386 PLUGIN_ASSERT(nodeIdxToRenderCommandContextIdx.count(currRenderNodeIdx) == 1);
387
388 ref.submitDepencies.waitSemaphoreNodeIndices[waitIdx] =
389 nodeIdxToRenderCommandContextIdx[currRenderNodeIdx];
390 }
391 }
392 }
393 }
394 }
395
396 template<typename T>
IsNull(T * ptr)397 inline bool IsNull(T* ptr)
398 {
399 return ptr == nullptr;
400 }
401
GetTimeStampNow()402 inline int64_t GetTimeStampNow()
403 {
404 using namespace std::chrono;
405 using Clock = system_clock;
406 return Clock::now().time_since_epoch().count();
407 }
408
CreateDefaultRenderNodeGraphs(const Device & device,RenderNodeGraphManager & rngMgr,RenderHandleReference & defaultStaging,RenderHandleReference & defaultEndFrameStaging)409 void CreateDefaultRenderNodeGraphs(const Device& device, RenderNodeGraphManager& rngMgr,
410 RenderHandleReference& defaultStaging, RenderHandleReference& defaultEndFrameStaging)
411 {
412 {
413 RenderNodeGraphDesc rngd;
414 {
415 RenderNodeDesc rnd;
416 rnd.typeName = "CORE_RN_STAGING";
417 rnd.nodeName = "CORE_RN_STAGING_I";
418 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
419 rngd.nodes.push_back(move(rnd));
420 }
421 #if (RENDER_VULKAN_RT_ENABLED == 1)
422 if (device.GetBackendType() == DeviceBackendType::VULKAN) {
423 RenderNodeDesc rnd;
424 rnd.typeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING";
425 rnd.nodeName = "CORE_RN_DEFAULT_ACCELERATION_STRUCTURE_STAGING_I";
426 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
427 rngd.nodes.push_back(move(rnd));
428 }
429 #endif
430 defaultStaging =
431 rngMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
432 }
433 {
434 RenderNodeGraphDesc rngd;
435 {
436 RenderNodeDesc rnd;
437 rnd.typeName = "CORE_RN_END_FRAME_STAGING";
438 rnd.nodeName = "CORE_RN_END_FRAME_STAGING_I";
439 rnd.description.queue = { GpuQueue::QueueType::GRAPHICS, 0u };
440 rngd.nodes.push_back(move(rnd));
441 }
442 defaultEndFrameStaging =
443 rngMgr.Create(IRenderNodeGraphManager::RenderNodeGraphUsageType::RENDER_NODE_GRAPH_STATIC, rngd);
444 }
445 }
446 } // namespace
447
Renderer(IRenderContext & context)448 Renderer::Renderer(IRenderContext& context)
449 : renderContext_(context), device_(static_cast<Device&>(context.GetDevice())),
450 gpuResourceMgr_(static_cast<GpuResourceManager&>(device_.GetGpuResourceManager())),
451 shaderMgr_(static_cast<ShaderManager&>(device_.GetShaderManager())),
452 renderNodeGraphMgr_(static_cast<RenderNodeGraphManager&>(context.GetRenderNodeGraphManager())),
453 renderDataStoreMgr_(static_cast<RenderDataStoreManager&>(context.GetRenderDataStoreManager())),
454 renderUtil_(static_cast<RenderUtil&>(context.GetRenderUtil()))
455
456 {
457 const RenderCreateInfo rci = ((const RenderContext&)renderContext_).GetCreateInfo();
458 if (rci.createFlags & RenderCreateInfo::CreateInfoFlagBits::CREATE_INFO_SEPARATE_RENDER_FRAME_BACKEND_BIT) {
459 separatedRendering_.separateBackend = true;
460 }
461 if (rci.createFlags & RenderCreateInfo::CreateInfoFlagBits::CREATE_INFO_SEPARATE_RENDER_FRAME_PRESENT_BIT) {
462 separatedRendering_.separatePresent = true;
463 }
464
465 const auto factory = GetInstance<ITaskQueueFactory>(UID_TASK_QUEUE_FACTORY);
466 if (factory) {
467 const uint32_t threadCount = GetThreadPoolThreadCount(factory->GetNumberOfCores(), rci.threadPoolCreateInfo);
468 forceSequentialQueue_ = (threadCount <= 1U);
469 threadPool_ = factory->CreateThreadPool(threadCount);
470 parallelQueue_ = factory->CreateParallelTaskQueue(threadPool_);
471 sequentialQueue_ = factory->CreateSequentialTaskQueue(threadPool_);
472 }
473
474 renderConfig_ = { device_.GetBackendType(), RenderingConfiguration::NdcOrigin::TOP_LEFT };
475 #if ((RENDER_HAS_GL_BACKEND) || (RENDER_HAS_GLES_BACKEND)) && (RENDER_GL_FLIP_Y_SWAPCHAIN == 0)
476 // The flag is for informative purposes only.
477 if ((renderConfig_.renderBackend == DeviceBackendType::OPENGL) ||
478 (renderConfig_.renderBackend == DeviceBackendType::OPENGLES)) {
479 renderConfig_.ndcOrigin = RenderingConfiguration::NdcOrigin::BOTTOM_LEFT;
480 }
481 #endif
482
483 renderGraph_ = make_unique<RenderGraph>(device_);
484 renderBackend_ = device_.CreateRenderBackend(gpuResourceMgr_, forceSequentialQueue_
485 ? static_cast<ITaskQueue*>(sequentialQueue_.get())
486 : static_cast<ITaskQueue*>(parallelQueue_.get()));
487 renderFrameSync_ = device_.CreateRenderFrameSync();
488 rngGlobalShareDataMgr_ = make_unique<RenderNodeGraphGlobalShareDataManager>();
489
490 CreateDefaultRenderNodeGraphs(device_, renderNodeGraphMgr_, defaultStagingRng_, defaultEndFrameStagingRng_);
491
492 dsStaging_ = static_cast<IRenderDataStoreDefaultStaging*>(
493 renderDataStoreMgr_.GetRenderDataStore(RENDER_DATA_STORE_DEFAULT_STAGING).get());
494 }
495
496 Renderer::~Renderer() = default;
497
InitNodeGraphs(const array_view<const RenderHandle> renderNodeGraphs)498 void Renderer::InitNodeGraphs(const array_view<const RenderHandle> renderNodeGraphs)
499 {
500 const RenderNodeGraphShareDataManager* prevRngShareDataMgr = nullptr;
501 for (const auto& rng : renderNodeGraphs) {
502 auto renderNodeDataStore = renderNodeGraphMgr_.Get(rng);
503 if (!renderNodeDataStore) {
504 continue;
505 }
506
507 RenderNodeGraphNodeStore& nodeStore = *renderNodeDataStore;
508 if (nodeStore.initialized) {
509 continue;
510 }
511 nodeStore.initialized = true;
512
513 const bool enableMultiQueue = (device_.GetGpuQueueCount() > 1);
514
515 // serial, initialize render node context data
516 auto renderNodeNameToIndex =
517 InitializeRenderNodeContextData(renderContext_, nodeStore, enableMultiQueue, renderConfig_);
518
519 if (enableMultiQueue) {
520 // patch gpu queue signaling
521 PatchSignaling(nodeStore, renderNodeNameToIndex);
522 }
523
524 // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
525 nodeStore.renderNodeGraphShareDataMgr->BeginFrame(rngGlobalShareDataMgr_.get(), prevRngShareDataMgr,
526 static_cast<uint32_t>(nodeStore.renderNodeData.size()),
527 { nodeStore.renderNodeGraphShareData.inputs, nodeStore.renderNodeGraphShareData.inputCount },
528 { nodeStore.renderNodeGraphShareData.outputs, nodeStore.renderNodeGraphShareData.outputCount });
529 prevRngShareDataMgr = nodeStore.renderNodeGraphShareDataMgr.get();
530
531 const RenderNodeContextManager::PerFrameTimings timings { 0, 0, device_.GetFrameCount() };
532 for (size_t nodeIdx = 0; nodeIdx < nodeStore.renderNodeData.size(); ++nodeIdx) {
533 auto& nodeContextData = nodeStore.renderNodeContextData[nodeIdx];
534 if (nodeContextData.initialized) {
535 continue;
536 }
537 nodeContextData.initialized = true;
538
539 // NOTE: needs to be called once before init. every frame called in BeginRenderNodeGraph()
540 nodeContextData.renderNodeContextManager->BeginFrame(static_cast<uint32_t>(nodeIdx), timings);
541
542 auto& renderNodeData = nodeStore.renderNodeData[nodeIdx];
543 PLUGIN_ASSERT(renderNodeData.inputData);
544 PLUGIN_ASSERT(renderNodeData.node);
545
546 RENDER_CPU_PERF_SCOPE("InitRenderNodes", renderNodeData.fullName);
547 renderNodeData.node->InitNode(*(nodeContextData.renderNodeContextManager));
548 }
549 }
550 }
551
552 // Helper for Renderer::RenderFrame
RemapBackBufferHandle(const IRenderDataStoreManager & renderData)553 void Renderer::RemapBackBufferHandle(const IRenderDataStoreManager& renderData)
554 {
555 const refcnt_ptr<IRenderDataStorePod> dataStorePod = renderData.GetRenderDataStore(RenderDataStorePod::TYPE_NAME);
556 if (dataStorePod) {
557 auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
558 const auto bb = reinterpret_cast<const NodeGraphBackBufferConfiguration*>(dataView.data());
559 if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::SWAPCHAIN) {
560 if (!device_.HasSwapchain()) {
561 PLUGIN_LOG_E("Using swapchain rendering without swapchain");
562 }
563 } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE) {
564 const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
565 if (RenderHandleUtil::IsValid(handle) && RenderHandleUtil::IsValid(bb->backBufferHandle)) {
566 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle);
567 }
568 } else if (bb->backBufferType == NodeGraphBackBufferConfiguration::BackBufferType::GPU_IMAGE_BUFFER_COPY) {
569 const RenderHandle handle = gpuResourceMgr_.GetImageRawHandle(bb->backBufferName);
570 if (RenderHandleUtil::IsValid(handle) && RenderHandleUtil::IsValid(bb->backBufferHandle) &&
571 RenderHandleUtil::IsValid(bb->gpuBufferHandle)) {
572 gpuResourceMgr_.RemapGpuImageHandle(handle, bb->backBufferHandle);
573 }
574 // handle image to buffer copy via post frame staging
575 {
576 RenderHandle backbufferHandle = bb->backBufferHandle;
577 if (bb->backBufferName == DefaultEngineGpuResourceConstants::CORE_DEFAULT_BACKBUFFER) {
578 // we need to use the core default backbuffer handle and not the replaced handle in this situation
579 backbufferHandle =
580 gpuResourceMgr_.GetImageHandle(DefaultEngineGpuResourceConstants::CORE_DEFAULT_BACKBUFFER)
581 .GetHandle();
582 }
583 const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(backbufferHandle);
584 const BufferImageCopy bic {
585 0, // bufferOffset
586 0, // bufferRowLength
587 0, // bufferImageHeight
588 ImageSubresourceLayers { CORE_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1u }, // imageSubresource
589 Size3D { 0, 0, 0 }, // imageOffset
590 Size3D { desc.width, desc.height, 1u }, // imageExtent
591 };
592 dsStaging_->CopyImageToBuffer(gpuResourceMgr_.Get(backbufferHandle),
593 gpuResourceMgr_.Get(bb->gpuBufferHandle), bic,
594 IRenderDataStoreDefaultStaging::ResourceCopyInfo::END_FRAME);
595 }
596 }
597 }
598 }
599
RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)600 void Renderer::RenderFrameImpl(const array_view<const RenderHandle> renderNodeGraphs)
601 {
602 if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
603 separatedRendering_.frontMtx.lock();
604 }
605
606 RENDER_CPU_PERF_BEGIN(renderFront, "RenderFrame", "Frontend");
607
608 Tick();
609 frameTimes_.begin = GetTimeStampNow();
610
611 if (!device_.GetDeviceStatus()) {
612 ProcessTimeStampEnd();
613 #if (RENDER_VALIDATION_ENABLED == 1)
614 PLUGIN_LOG_ONCE_E("invalid_device_status_render_frame", "invalid device for rendering");
615 #endif
616 return;
617 }
618 CORE_PROFILER_MARK_FRAME_START(FRAME_MARKER);
619
620 renderDataStoreMgr_.CommitFrameData();
621
622 device_.Activate();
623 device_.FrameStart();
624 renderFrameTimeData_.frameIndex = device_.GetFrameCount();
625
626 (static_cast<GpuResourceCache&>(gpuResourceMgr_.GetGpuResourceCache())).BeginFrame(device_.GetFrameCount());
627
628 // handle utils (needs to be called before render data store pre renders)
629 renderUtil_.BeginFrame();
630 // global descriptor set manager
631 device_.GetDescriptorSetManager().BeginFrame();
632
633 // remap the default back buffer (needs to be called before render data store pre renders)
634 RemapBackBufferHandle(renderDataStoreMgr_);
635
636 renderNodeGraphMgr_.HandlePendingAllocations();
637 renderDataStoreMgr_.PreRender();
638
639 // create new shaders if any created this frame (needs to be called before render node init)
640 shaderMgr_.HandlePendingAllocations();
641
642 auto& rngInputs = renderFrameTimeData_.rngInputs;
643 auto& rngNodeStores = renderFrameTimeData_.rngNodeStores;
644 PLUGIN_ASSERT(rngInputs.empty());
645 PLUGIN_ASSERT(rngNodeStores.empty());
646
647 gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::UNDEFINED);
648 // update render node graphs with default staging
649 FillRngInputs(renderNodeGraphs, rngInputs);
650 const auto renderNodeGraphInputs = array_view(rngInputs.data(), rngInputs.size());
651
652 InitNodeGraphs(renderNodeGraphInputs);
653 device_.Deactivate();
654
655 renderGraph_->BeginFrame();
656
657 FillRngNodeStores(renderNodeGraphInputs, renderNodeGraphMgr_, rngNodeStores);
658 if (std::any_of(rngNodeStores.begin(), rngNodeStores.end(), IsNull<RenderNodeGraphNodeStore>)) {
659 ProcessTimeStampEnd();
660 PLUGIN_LOG_W("invalid render node graphs for rendering");
661 return;
662 }
663
664 // NodeContextPoolManagerGLES::BeginFrame may delete FBOs and device must be active.
665 device_.Activate();
666
667 renderFrameSync_->BeginFrame();
668 // begin frame (advance ring buffers etc.)
669 const RenderNodeContextManager::PerFrameTimings timings { previousFrameTime_ - firstTime_, deltaTime_,
670 device_.GetFrameCount() };
671 BeginRenderNodeGraph(rngGlobalShareDataMgr_.get(), rngNodeStores, timings);
672
673 // synchronize, needed for persistantly mapped gpu buffer writing
674 if (!WaitForFence(device_, *renderFrameSync_)) {
675 device_.Deactivate();
676 return; // possible lost device with frame fence
677 }
678
679 // gpu resource allocation and deallocation
680 gpuResourceMgr_.HandlePendingAllocations(true);
681
682 device_.Deactivate();
683
684 const auto nodeStoresView = array_view<RenderNodeGraphNodeStore*>(rngNodeStores);
685 ExecuteRenderNodes(nodeStoresView);
686
687 // render graph process for all render nodes of all render graphs
688 ProcessRenderNodeGraph(device_, *renderGraph_, nodeStoresView);
689
690 renderDataStoreMgr_.PostRender();
691
692 // set front-end index (before mutexes)
693 renderStatus_.frontEndIndex = renderFrameTimeData_.frameIndex;
694 if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
695 separatedRendering_.frontMtx.unlock();
696 }
697 RENDER_CPU_PERF_END(renderFront);
698 if (!separatedRendering_.separateBackend) {
699 RenderFrameBackendImpl();
700 }
701 }
702
RenderFrameBackendImpl()703 void Renderer::RenderFrameBackendImpl()
704 {
705 if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
706 separatedRendering_.frontMtx.lock();
707 separatedRendering_.backMtx.lock();
708 }
709
710 RENDER_CPU_PERF_BEGIN(renderBack, "RenderFrame", "Backend");
711
712 auto& rngInputs = renderFrameTimeData_.rngInputs;
713 auto& rngNodeStores = renderFrameTimeData_.rngNodeStores;
714
715 gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_BACKEND);
716 device_.SetLockResourceBackendAccess(true);
717 renderDataStoreMgr_.PreRenderBackend();
718
719 size_t allRenderNodeCount = 0;
720 for (const auto* nodeStore : rngNodeStores) {
721 PLUGIN_ASSERT(nodeStore);
722 if (nodeStore) {
723 allRenderNodeCount += nodeStore->renderNodeData.size();
724 }
725 }
726
727 RenderCommandFrameData rcfd;
728 PLUGIN_ASSERT(renderFrameSync_);
729 rcfd.renderFrameSync = renderFrameSync_.get();
730 rcfd.renderFrameUtil = &(static_cast<RenderFrameUtil&>(renderContext_.GetRenderUtil().GetRenderFrameUtil()));
731 rcfd.renderCommandContexts.reserve(allRenderNodeCount);
732
733 const bool multiQueueEnabled = (device_.GetGpuQueueCount() > 1u);
734 IterateRenderBackendNodeGraphNodeStores(rngNodeStores, multiQueueEnabled, rcfd);
735
736 // NOTE: by node graph name
737 // NOTE: deprecate this
738 const RenderGraph::SwapchainStates bbState = renderGraph_->GetSwapchainResourceStates();
739 RenderBackendBackBufferConfiguration config;
740 for (const auto& swapState : bbState.swapchains) {
741 config.swapchainData.push_back({ swapState.handle, swapState.state, swapState.layout, {} });
742 }
743 if (!config.swapchainData.empty()) {
744 // NOTE: this is a backwards compatibility for a single (default) swapchain config data
745 // should be removed
746 if (const refcnt_ptr<IRenderDataStorePod> dataStorePod =
747 renderDataStoreMgr_.GetRenderDataStore(RenderDataStorePod::TYPE_NAME)) {
748 auto const dataView = dataStorePod->Get("NodeGraphBackBufferConfiguration");
749 if (dataView.size_bytes() == sizeof(NodeGraphBackBufferConfiguration)) {
750 // expects to be the first swapchain in the list
751 const auto* bb = (const NodeGraphBackBufferConfiguration*)dataView.data();
752 config.swapchainData[0U].config = *bb;
753 }
754 }
755 }
756 renderFrameTimeData_.config = config;
757 // must run backend if there are descriptor sets to update even if there's nothing to render.
758 renderFrameTimeData_.hasBackendWork = (!rcfd.renderCommandContexts.empty()) ||
759 (!device_.GetDescriptorSetManager().GetUpdateDescriptorSetHandles().empty());
760
761 device_.Activate();
762
763 if (renderFrameTimeData_.hasBackendWork) { // do not execute backend with zero work
764 device_.SetRenderBackendRunning(true);
765
766 frameTimes_.beginBackend = GetTimeStampNow();
767 renderBackend_->Render(rcfd, config);
768 frameTimes_.endBackend = GetTimeStampNow();
769
770 device_.SetRenderBackendRunning(false);
771 }
772 gpuResourceMgr_.EndFrame();
773
774 if (separatedRendering_.separatePresent) {
775 device_.Deactivate();
776 }
777
778 device_.SetLockResourceBackendAccess(false);
779
780 // clear
781 rngInputs.clear();
782 rngNodeStores.clear();
783
784 // set backend-end index (before mutexes)
785 renderStatus_.backEndIndex = renderStatus_.frontEndIndex;
786 if (separatedRendering_.separateBackend || separatedRendering_.separatePresent) {
787 separatedRendering_.frontMtx.unlock();
788 separatedRendering_.backMtx.unlock();
789 }
790 RENDER_CPU_PERF_END(renderBack);
791 if (!separatedRendering_.separatePresent) {
792 RenderFramePresentImpl();
793 }
794 }
795
RenderFramePresentImpl()796 void Renderer::RenderFramePresentImpl()
797 {
798 if (separatedRendering_.separatePresent) {
799 separatedRendering_.backMtx.lock();
800 }
801
802 RENDER_CPU_PERF_SCOPE("RenderFrame", "Presentation");
803
804 if (renderFrameTimeData_.hasBackendWork) { // do not execute backend with zero work
805 if (separatedRendering_.separatePresent) {
806 device_.Activate();
807 }
808
809 frameTimes_.beginBackendPresent = GetTimeStampNow();
810 renderBackend_->Present(renderFrameTimeData_.config);
811 frameTimes_.endBackendPresent = GetTimeStampNow();
812
813 if (separatedRendering_.separatePresent) {
814 device_.Deactivate();
815 }
816 }
817 if (!separatedRendering_.separatePresent) {
818 device_.Deactivate();
819 }
820
821 renderDataStoreMgr_.PostRenderBackend();
822
823 renderFrameTimeData_.config = {};
824
825 // needs to be called after render data store post render
826 renderUtil_.EndFrame();
827
828 // RenderFramePresentImpl() needs to be called every frame even thought there isn't presenting
829 device_.FrameEnd();
830 ProcessTimeStampEnd();
831 CORE_PROFILER_MARK_FRAME_END(FRAME_MARKER);
832
833 CORE_PROFILER_MARK_GLOBAL_FRAME_CHANGED();
834 // set presentation index (before mutexes)
835 renderStatus_.presentIndex = renderStatus_.backEndIndex;
836 if (separatedRendering_.separatePresent) {
837 separatedRendering_.backMtx.unlock();
838 }
839 }
840
RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)841 uint64_t Renderer::RenderFrame(const array_view<const RenderHandleReference> renderNodeGraphs)
842 {
843 const auto lock = std::lock_guard(renderMutex_);
844
845 // add only unique and valid handles to list for rendering
846 vector<RenderHandle> rngs;
847 rngs.reserve(renderNodeGraphs.size());
848 for (size_t iIdx = 0; iIdx < renderNodeGraphs.size(); ++iIdx) {
849 const RenderHandle& handle = renderNodeGraphs[iIdx].GetHandle();
850 bool duplicate = false;
851 for (auto& ref : rngs) {
852 if (ref == handle) {
853 duplicate = true;
854 }
855 }
856 if ((RenderHandleUtil::GetHandleType(handle) == RenderHandleType::RENDER_NODE_GRAPH) && (!duplicate)) {
857 rngs.push_back(handle);
858 }
859 #if (RENDER_VALIDATION_ENABLED == 1)
860 if (duplicate) {
861 PLUGIN_LOG_ONCE_E("renderer_rf_duplicate_rng",
862 "RENDER_VALIDATION: duplicate render node graphs are not supported (idx: %u, id: %" PRIx64,
863 static_cast<uint32_t>(iIdx), handle.id);
864 }
865 #endif
866 }
867 device_.SetRenderFrameRunning(true);
868 // NOTE: this is the only place from where RenderFrameImpl is called
869 RenderFrameImpl(rngs);
870 device_.SetRenderFrameRunning(false);
871
872 return renderStatus_.frontEndIndex;
873 }
874
RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)875 uint64_t Renderer::RenderDeferred(const array_view<const RenderHandleReference> renderNodeGraphs)
876 {
877 const auto lock = std::lock_guard(deferredMutex_);
878 for (const auto& ref : renderNodeGraphs) {
879 deferredRenderNodeGraphs_.push_back(ref);
880 }
881 return renderStatusDeferred_ + 1;
882 }
883
RenderDeferredFrame()884 uint64_t Renderer::RenderDeferredFrame()
885 {
886 deferredMutex_.lock();
887 decltype(deferredRenderNodeGraphs_) renderNodeGraphs = move(deferredRenderNodeGraphs_);
888 renderStatusDeferred_ = renderStatus_.frontEndIndex + 1;
889 deferredMutex_.unlock();
890 RenderFrame(renderNodeGraphs);
891
892 return renderStatus_.frontEndIndex;
893 }
894
ExecuteRenderNodes(const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)895 void Renderer::ExecuteRenderNodes(const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
896 {
897 #if (RENDER_PERF_ENABLED == 1)
898 RENDER_CPU_PERF_BEGIN(fullExecuteCpuTimer, "RenderFrame", "ExecuteAllNodes");
899
900 size_t allRenderNodeCount = 0;
901 for (size_t graphIdx = 0; graphIdx < renderNodeGraphNodeStores.size(); ++graphIdx) {
902 allRenderNodeCount += renderNodeGraphNodeStores[graphIdx]->renderNodeData.size();
903 }
904
905 vector<NodeTimerData> nodeTimers(allRenderNodeCount);
906 #endif
907
908 ITaskQueue* queue = nullptr;
909 if ((!forceSequentialQueue_) && device_.AllowThreadedProcessing()) {
910 queue = parallelQueue_.get();
911 } else {
912 queue = sequentialQueue_.get();
913 }
914 if (!queue) {
915 return; // fatal
916 }
917
918 gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_PRE_EXECUTE);
919 // single threaded gpu resource creation with render nodes
920 RenderNodePreExecution(renderNodeGraphNodeStores);
921
922 // lock staging data for this frame
923 // NOTE: should be done with double buffering earlier
924 gpuResourceMgr_.LockFrameStagingData();
925 // final gpu resource allocation and deallocation before render node execute
926 device_.Activate();
927 gpuResourceMgr_.HandlePendingAllocations(true);
928 gpuResourceMgr_.MapRenderTimeGpuBuffers();
929 device_.Deactivate();
930
931 // process render node graph render node share preparations
932 for (auto& ref : renderNodeGraphNodeStores) {
933 ref->renderNodeGraphShareDataMgr->PrepareExecuteFrame();
934 }
935
936 // lock global descriptor set creation
937 device_.GetDescriptorSetManager().LockFrameCreation();
938 gpuResourceMgr_.SetState(GpuResourceManager::RenderTimeState::RENDER_EXECUTE);
939
940 RenderNodeExecutionParameters params = {
941 renderNodeGraphNodeStores,
942 #if (RENDER_PERF_ENABLED == 1)
943 nodeTimers,
944 #endif
945 queue,
946 renderDataStoreMgr_,
947 shaderMgr_,
948 };
949
950 // multi-threaded render node execution
951 RenderNodeExecution(params);
952
953 // Remove tasks.
954 queue->Clear();
955
956 // final gpu resource allocation before render graph
957 device_.Activate();
958 gpuResourceMgr_.UnmapRenderTimeGpuBuffers();
959 // do not allow destruction here
960 gpuResourceMgr_.HandlePendingAllocations(false);
961 device_.Deactivate();
962
963 #if (RENDER_PERF_ENABLED == 1)
964 RENDER_CPU_PERF_END(fullExecuteCpuTimer);
965
966 if (auto* inst = GetInstance<IPerformanceDataManagerFactory>(UID_PERFORMANCE_FACTORY); inst) {
967 if (IPerformanceDataManager* perfData = inst->Get("RenderNode"); perfData) {
968 for (size_t nodeIdx = 0; nodeIdx < nodeTimers.size(); ++nodeIdx) {
969 const auto& timerRef = nodeTimers[nodeIdx];
970 perfData->UpdateData(timerRef.debugName, "RenderNodeExecute_Cpu", timerRef.timer.GetMicroseconds());
971 }
972 }
973 }
974 #endif
975 }
976
RenderFrameBackend(const RenderFrameBackendInfo & info)977 uint64_t Renderer::RenderFrameBackend(const RenderFrameBackendInfo& info)
978 {
979 if (separatedRendering_.separateBackend) {
980 RenderFrameBackendImpl();
981 } else {
982 PLUGIN_LOG_E("RenderFrameBackend called separately even though render context not created as separate");
983 }
984
985 return renderStatus_.backEndIndex;
986 }
987
RenderFramePresent(const RenderFramePresentInfo & info)988 uint64_t Renderer::RenderFramePresent(const RenderFramePresentInfo& info)
989 {
990 if (separatedRendering_.separatePresent) {
991 RenderFramePresentImpl();
992 } else {
993 PLUGIN_LOG_E("RenderFramePresent called separately even though render context not created as separate");
994 }
995
996 return renderStatus_.presentIndex;
997 }
998
GetFrameStatus() const999 IRenderer::RenderStatus Renderer::GetFrameStatus() const
1000 {
1001 return renderStatus_;
1002 }
1003
FillRngInputs(const array_view<const RenderHandle> renderNodeGraphInputList,vector<RenderHandle> & rngInputs)1004 void Renderer::FillRngInputs(
1005 const array_view<const RenderHandle> renderNodeGraphInputList, vector<RenderHandle>& rngInputs)
1006 {
1007 constexpr size_t defaultRenderNodeGraphCount = 2;
1008 rngInputs.reserve(renderNodeGraphInputList.size() + defaultRenderNodeGraphCount);
1009 rngInputs.push_back(defaultStagingRng_.GetHandle());
1010 rngInputs.append(renderNodeGraphInputList.begin().ptr(), renderNodeGraphInputList.end().ptr());
1011 rngInputs.push_back(defaultEndFrameStagingRng_.GetHandle());
1012 }
1013
ProcessTimeStampEnd()1014 void Renderer::ProcessTimeStampEnd()
1015 {
1016 frameTimes_.end = GetTimeStampNow();
1017
1018 int64_t finalTime = frameTimes_.begin;
1019 finalTime = Math::max(finalTime, frameTimes_.beginBackend);
1020 frameTimes_.beginBackend = finalTime;
1021
1022 finalTime = Math::max(finalTime, frameTimes_.endBackend);
1023 frameTimes_.endBackend = finalTime;
1024
1025 finalTime = Math::max(finalTime, frameTimes_.beginBackendPresent);
1026 frameTimes_.beginBackendPresent = finalTime;
1027
1028 finalTime = Math::max(finalTime, frameTimes_.endBackendPresent);
1029 frameTimes_.endBackendPresent = finalTime;
1030
1031 finalTime = Math::max(finalTime, frameTimes_.end);
1032 frameTimes_.end = finalTime;
1033
1034 PLUGIN_ASSERT(frameTimes_.end >= frameTimes_.endBackend);
1035 PLUGIN_ASSERT(frameTimes_.endBackend >= frameTimes_.beginBackend);
1036 PLUGIN_ASSERT(frameTimes_.beginBackendPresent >= frameTimes_.beginBackend);
1037 PLUGIN_ASSERT(frameTimes_.endBackendPresent >= frameTimes_.beginBackendPresent);
1038
1039 renderUtil_.SetRenderTimings(frameTimes_);
1040 frameTimes_ = {};
1041 }
1042
Tick()1043 void Renderer::Tick()
1044 {
1045 using namespace std::chrono;
1046 const auto currentTime =
1047 static_cast<uint64_t>(duration_cast<microseconds>(high_resolution_clock::now().time_since_epoch()).count());
1048
1049 if (firstTime_ == ~0u) {
1050 previousFrameTime_ = firstTime_ = currentTime;
1051 }
1052 deltaTime_ = currentTime - previousFrameTime_;
1053 constexpr auto limitHz = duration_cast<microseconds>(duration<float, std::ratio<1, 15u>>(1)).count();
1054 if (deltaTime_ > limitHz) {
1055 deltaTime_ = limitHz; // clamp the time step to no longer than 15hz.
1056 }
1057 previousFrameTime_ = currentTime;
1058 }
1059 RENDER_END_NAMESPACE()
1060