• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_post_process_upscale_node.h"
17 
18 #include <base/containers/unique_ptr.h>
19 #include <base/math/matrix_util.h>
20 #include <core/log.h>
21 #include <core/property/property_handle_util.h>
22 #include <core/property_tools/property_api_impl.inl>
23 #include <render/device/intf_gpu_resource_manager.h>
24 #include <render/nodecontext/intf_node_context_descriptor_set_manager.h>
25 #include <render/nodecontext/intf_node_context_pso_manager.h>
26 #include <render/nodecontext/intf_render_command_list.h>
27 #include <render/nodecontext/intf_render_node_context_manager.h>
28 #include <render/nodecontext/intf_render_node_parser_util.h>
29 #include <render/nodecontext/intf_render_node_util.h>
30 #include <render/property/property_types.h>
31 
32 #include "default_engine_constants.h"
33 #include "postprocesses/render_post_process_upscale.h"
34 #include "util/log.h"
35 
36 // shaders
37 #include <render/shaders/common/render_post_process_structs_common.h>
38 
39 using namespace BASE_NS;
40 using namespace CORE_NS;
41 using namespace RENDER_NS;
42 
43 CORE_BEGIN_NAMESPACE()
44 DATA_TYPE_METADATA(RenderPostProcessUpscaleNode::NodeInputs, MEMBER_PROPERTY(input, "input", 0),
45     MEMBER_PROPERTY(depth, "depth", 0), MEMBER_PROPERTY(velocity, "velocity", 0))
46 DATA_TYPE_METADATA(RenderPostProcessUpscaleNode::NodeOutputs, MEMBER_PROPERTY(output, "output", 0))
CORE_END_NAMESPACE()47 CORE_END_NAMESPACE()
48 
49 RENDER_BEGIN_NAMESPACE()
50 
51 RenderPostProcessUpscaleNode::RenderPostProcessUpscaleNode()
52     : inputProperties_(
53           &nodeInputsData, array_view(PropertyType::DataType<RenderPostProcessUpscaleNode::NodeInputs>::properties)),
54       outputProperties_(
55           &nodeOutputsData, array_view(PropertyType::DataType<RenderPostProcessUpscaleNode::NodeOutputs>::properties))
56 
57 {}
58 
GetRenderInputProperties()59 IPropertyHandle* RenderPostProcessUpscaleNode::GetRenderInputProperties()
60 {
61     return inputProperties_.GetData();
62 }
63 
GetRenderOutputProperties()64 IPropertyHandle* RenderPostProcessUpscaleNode::GetRenderOutputProperties()
65 {
66     return outputProperties_.GetData();
67 }
68 
GetRenderDescriptorCounts() const69 DescriptorCounts RenderPostProcessUpscaleNode::GetRenderDescriptorCounts() const
70 {
71     return DescriptorCounts { {
72         { CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 32u },
73         { CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 32u },
74         { CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE, 32u },
75         { CORE_DESCRIPTOR_TYPE_SAMPLER, 24u },
76     } };
77 }
78 
SetRenderAreaRequest(const RenderAreaRequest & renderAreaRequest)79 void RenderPostProcessUpscaleNode::SetRenderAreaRequest(const RenderAreaRequest& renderAreaRequest)
80 {
81     useRequestedRenderArea_ = true;
82     renderAreaRequest_ = renderAreaRequest;
83 }
84 
Init(const IRenderPostProcess::Ptr & postProcess,IRenderNodeContextManager & renderNodeContextMgr)85 void RenderPostProcessUpscaleNode::Init(
86     const IRenderPostProcess::Ptr& postProcess, IRenderNodeContextManager& renderNodeContextMgr)
87 {
88     renderNodeContextMgr_ = &renderNodeContextMgr;
89     postProcess_ = postProcess;
90     renderCopyOutput_.Init(renderNodeContextMgr);
91 
92     auto& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
93     samplerHandle_ = gpuResourceMgr.Create(samplerHandle_,
94         GpuSamplerDesc {
95             Filter::CORE_FILTER_LINEAR,                                  // magFilter
96             Filter::CORE_FILTER_LINEAR,                                  // minFilter
97             Filter::CORE_FILTER_LINEAR,                                  // mipMapMode
98             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeU
99             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeV
100             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeW
101         });
102     SetCameraData();
103 
104     valid_ = true;
105 }
106 
PreExecute()107 void RenderPostProcessUpscaleNode::PreExecute()
108 {
109     if (valid_ && postProcess_) {
110         const array_view<const uint8_t> propertyView = postProcess_->GetData();
111         // this node is directly dependant
112         PLUGIN_ASSERT(propertyView.size_bytes() == sizeof(RenderPostProcessUpscaleNode::EffectProperties));
113         if (propertyView.size_bytes() == sizeof(RenderPostProcessUpscaleNode::EffectProperties)) {
114             effectProperties_ = (const RenderPostProcessUpscaleNode::EffectProperties&)(*propertyView.data());
115         }
116         effectProperties_.ratio = 1.5f;
117         const GpuImageDesc& imgDesc =
118             renderNodeContextMgr_->GetGpuResourceManager().GetImageDescriptor(nodeInputsData.input.handle);
119 
120         mipLevels_ = Math::max(1U, TARGET_COUNT);
121         // floor(log2(Math::max(imgDesc.width, imgDesc.height)));
122         CreateTargets(Math::UVec2(imgDesc.width, imgDesc.height));
123         if (effectProperties_.enabled) {
124             // check input and output
125             EvaluateOutput();
126         }
127     } else {
128         effectProperties_.enabled = false;
129     }
130 }
131 
GetExecuteFlags() const132 IRenderNode::ExecuteFlags RenderPostProcessUpscaleNode::GetExecuteFlags() const
133 {
134     if (effectProperties_.enabled) {
135         return 0;
136     } else {
137         return IRenderNode::ExecuteFlagBits::EXECUTE_FLAG_BITS_DO_NOT_EXECUTE;
138     }
139 }
140 
Execute(IRenderCommandList & cmdList)141 void RenderPostProcessUpscaleNode::Execute(IRenderCommandList& cmdList)
142 {
143     if (!valid_) {
144         return;
145     }
146     // NOTE: target counts etc. should probably be resized based on configuration
147 
148     CreatePsos();
149 
150     EvaluateOutput();
151     BindableImage currOutput = nodeOutputsData.output;
152     if (!RenderHandleUtil::IsValid(currOutput.handle)) {
153         return;
154     }
155 
156     constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
157         sizeof(LocalPostProcessPushConstantStruct) };
158     // update the output
159     nodeOutputsData.output = currOutput;
160     RENDER_DEBUG_MARKER_SCOPE(cmdList, "Upscaling (LSR)");
161     {
162         RENDER_DEBUG_MARKER_COL_SCOPE(
163             cmdList, "Compute Luminance Hierarchy", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
164         ComputeLuminancePyramid(pc, cmdList);
165     }
166     {
167         RENDER_DEBUG_MARKER_COL_SCOPE(
168             cmdList, "Dilate and reconstruct", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
169         ComputeReconstructAndDilate(pc, cmdList);
170     }
171     {
172         RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Depth clip", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
173         ComputeDepthClip(pc, cmdList);
174     }
175     {
176         constexpr PushConstant lockPassPc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
177             sizeof(LockPassPushConstant) };
178         RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Create Locks", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
179         ComputeCreateLocks(lockPassPc, cmdList);
180     }
181     {
182         constexpr PushConstant accumulatePassPc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
183             sizeof(AccumulatePassPushConstant) };
184         RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Accumulate", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
185         ComputeAccumulate(accumulatePassPc, cmdList);
186     }
187 
188     if (targets_.rcas_enabled) {
189         RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "RCAS", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
190         ComputeRcas(pc, cmdList);
191     }
192 
193     // Toggle between history and current motion vector texture
194     targets_.motionVectorIdx ^= 1;
195     targets_.historyBufferIdx ^= 1;
196     IRenderNodeCopyUtil::CopyInfo copyInfo;
197     BindableImage finalColor;
198 
199     targets_.rcas_enabled ? finalColor.handle = targets_.rcas_final.GetHandle()
200                           : finalColor.handle = targets_.finalColor.GetHandle();
201     copyInfo.input = finalColor;
202     copyInfo.output = nodeOutputsData.output;
203     renderCopyOutput_.Execute(cmdList, copyInfo);
204 }
205 
SetCameraData()206 void RenderPostProcessUpscaleNode::SetCameraData()
207 {
208     // change this
209     // Temporary workaround to send camera data from 3D to Render
210     RenderHandle cameraUbo = renderNodeContextMgr_->GetGpuResourceManager().GetBufferHandle(
211         "RenderDataStoreDefaultSceneCORE3D_DM_CAM_DATA_BUF");
212     targets_.cameraUbo = cameraUbo;
213 }
214 
ComputeLuminancePyramid(const PushConstant & pc,IRenderCommandList & cmdList)215 void RenderPostProcessUpscaleNode::ComputeLuminancePyramid(const PushConstant& pc, IRenderCommandList& cmdList)
216 {
217     // bind downscale pso
218     cmdList.BindPipeline(psos_.luminanceDownscale);
219     const ShaderThreadGroup tgs = psos_.luminanceDownscaleTGS;
220 
221     if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
222         return;
223     }
224     //-----------------------------------------------------------
225     // Pass #1: generate luminance texture mip 0 by sampling the original input
226     //-----------------------------------------------------------
227     {
228         auto& binder = binders_.luminanceDownscale;
229         binder->ClearBindings();
230 
231         binder->BindImage(0U, { targets_.tex1[0].GetHandle() });
232         binder->BindImage(1U, { nodeInputsData.input.handle });
233         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
234 
235         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
236         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
237 
238         const auto targetSize = targets_.tex1Size[0];
239         LocalPostProcessPushConstantStruct uPc;
240         uPc.viewportSizeInvSize = Math::Vec4(
241             float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
242 
243         cmdList.PushConstantData(pc, arrayviewU8(uPc));
244         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
245     }
246 
247     // bind hierarchy pso
248     cmdList.BindPipeline(psos_.luminancePyramid);
249     const ShaderThreadGroup PyramidTgs = psos_.luminancePyramidTGS;
250     ////-----------------------------------------------------------
251     //// Pass #2..N: generate each subsequent mip from the previous
252     ////-----------------------------------------------------------
253     for (size_t i = 1; i < mipLevels_; ++i) {
254         {
255             auto& binder = binders_.luminancePyramid[i];
256             const RenderHandle setHandle = binder->GetDescriptorSetHandle();
257             binder->ClearBindings();
258 
259             binder->BindImage(0U, { targets_.tex1[i].GetHandle() });
260             binder->BindImage(1U, { targets_.tex1[i - 1].GetHandle() });
261             binder->BindSampler(2U, { samplerHandle_.GetHandle() });
262 
263             cmdList.UpdateDescriptorSet(
264                 binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
265             cmdList.BindDescriptorSet(0U, setHandle);
266         }
267 
268         const auto targetSize = targets_.tex1Size[i];
269 
270         LocalPostProcessPushConstantStruct uPc;
271         uPc.viewportSizeInvSize = Math::Vec4(
272             float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
273         cmdList.PushConstantData(pc, arrayviewU8(uPc));
274 
275         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
276     }
277 }
278 
ComputeReconstructAndDilate(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)279 void RenderPostProcessUpscaleNode::ComputeReconstructAndDilate(
280     const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
281 {
282     cmdList.BindPipeline(psos_.reconstructAndDilate);
283     const ShaderThreadGroup& tgs = psos_.reconstructAndDilateTGS;
284 
285     const uint32_t motionVecIdx = targets_.motionVectorIdx;
286 
287     auto& binder = binders_.reconstructAndDilate;
288     if (!binder)
289         return;
290     {
291         binder->ClearBindings();
292 
293         //  Bindings
294         //  - 0: uPreviousDepth (R32_UINT)
295         //  - 1: DilatedDepth  (R16_UINT)
296         //  - 2: sampler
297         //  - 3: DilatedMotion (R16G16F)
298         //  - 4: LockInputLuma (R16F)
299         //  - 5: Depth         (sampler2D)
300         //  - 6: Velocity
301         //  - 7: Color
302         binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
303         binder->BindImage(1U, { targets_.dilatedDepth.GetHandle() });
304         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
305         binder->BindImage(3U, { targets_.dilatedMotionVectors[motionVecIdx].GetHandle() });
306         binder->BindImage(4U, { targets_.lockInputLuma.GetHandle() });
307         binder->BindImage(5U, { nodeInputsData.depth.handle });
308         binder->BindImage(6U, { nodeInputsData.velocity.handle });
309         binder->BindImage(7U, { nodeInputsData.input.handle });
310 
311         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
312         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
313 
314         const auto targetSize = targets_.renderResolution;
315         LocalPostProcessPushConstantStruct uPc;
316         uPc.viewportSizeInvSize = Math::Vec4(
317             float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
318         cmdList.PushConstantData(pc, arrayviewU8(uPc));
319 
320         // Dispatch
321         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
322     }
323 }
324 
ComputeDebug(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)325 void RenderPostProcessUpscaleNode::ComputeDebug(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
326 {
327     // bind pso
328     cmdList.BindPipeline(psos_.debugPass);
329     const ShaderThreadGroup tgs = psos_.debugPassTGS;
330 
331     auto& binder = binders_.debugPass;
332     if (!binder) {
333         return;
334     }
335     if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
336         return;
337     }
338     {
339         binder->ClearBindings();
340 
341         binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
342         binder->BindImage(1U, { targets_.debugImage.GetHandle() });
343         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
344 
345         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
346         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
347 
348         const auto targetSize = targets_.renderResolution;
349 
350         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
351     }
352 }
353 
ComputeDepthClip(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)354 void RenderPostProcessUpscaleNode::ComputeDepthClip(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
355 {
356     // bind pso
357     cmdList.BindPipeline(psos_.depthClipPass);
358     const ShaderThreadGroup tgs = psos_.depthClipPassTGS;
359 
360     const uint32_t curMotionVecIdx = targets_.motionVectorIdx;
361     const uint32_t prevMotionVecIdx = curMotionVecIdx ^ 1;
362 
363     auto& binder = binders_.depthClipPass;
364     if (!binder) {
365         return;
366     }
367     {
368         binder->ClearBindings();
369 
370         //  Bindings
371         //  - 0: estPrevDepth (R32_UINT)
372         //  - 1: DilatedDepth  (R16_UINT)
373         //  - 2: sampler
374         //  - 3: DilatedMotion (R16G16F)
375         //  - 5: Input Color (sampler2D)
376         //  - 6: Depth
377         //  - 7: Velocity
378         //  - 7: Adjusted Color output (R16G16B16A16F)
379         //  - 8: Dilated reactive masks output (R16G16F)
380         binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
381         binder->BindImage(1U, { targets_.dilatedDepth.GetHandle() });
382         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
383         binder->BindImage(3U, { targets_.dilatedMotionVectors[curMotionVecIdx].GetHandle() });
384         binder->BindImage(4U, { targets_.dilatedMotionVectors[prevMotionVecIdx].GetHandle() });
385 
386         binder->BindImage(5U, { nodeInputsData.input.handle });
387         binder->BindImage(6U, { nodeInputsData.velocity.handle });
388         binder->BindImage(7U, { targets_.adjustedColorBuffer.GetHandle() });
389         binder->BindImage(8U, { targets_.dilatedReactiveMask.GetHandle() });
390 
391         // Bind camera UBO
392         binder->BindBuffer(12U, targets_.cameraUbo, 0);
393 
394         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
395         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
396 
397         const auto targetSize = targets_.renderResolution;
398         LocalPostProcessPushConstantStruct uPc;
399         uPc.viewportSizeInvSize = Math::Vec4(
400             float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
401         cmdList.PushConstantData(pc, arrayviewU8(uPc));
402 
403         // Dispatch
404         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
405     }
406 }
407 
ComputeCreateLocks(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)408 void RenderPostProcessUpscaleNode::ComputeCreateLocks(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
409 {
410     // bind pso
411     cmdList.BindPipeline(psos_.locksPass);
412     const ShaderThreadGroup tgs = psos_.locksPassTGS;
413 
414     auto& binder = binders_.locksPass;
415     if (!binder) {
416         return;
417     }
418     if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
419         return;
420     }
421     {
422         binder->ClearBindings();
423 
424         binder->BindImage(0U, { targets_.lockInputLuma.GetHandle() });
425         binder->BindImage(1U, { targets_.newLocksMask.GetHandle() });
426         binder->BindImage(3U, { targets_.estPrevDepth.GetHandle() });
427         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
428 
429         // Bind camera UBO
430         binder->BindBuffer(12U, targets_.cameraUbo, 0);
431 
432         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
433         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
434 
435         const auto renderSize = targets_.renderResolution;
436         const auto displaySize = targets_.displayResolution;
437 
438         LockPassPushConstant uPc;
439         uPc.renderSizeInvSize = Math::Vec4(
440             float(renderSize.x), float(renderSize.y), 1.0f / float(renderSize.x), 1.0f / float(renderSize.y));
441         uPc.displaySizeInvSize = Math::Vec4(
442             float(displaySize.x), float(displaySize.y), 1.0f / float(displaySize.x), 1.0f / float(displaySize.y));
443 
444         cmdList.PushConstantData(pc, arrayviewU8(uPc));
445         cmdList.Dispatch((renderSize.x + tgs.x - 1) / tgs.x, (renderSize.y + tgs.y - 1) / tgs.y, 1);
446     }
447 }
448 
ComputeAccumulate(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)449 void RenderPostProcessUpscaleNode::ComputeAccumulate(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
450 {
451     // bind pso
452     cmdList.BindPipeline(psos_.accumulatePass);
453     const ShaderThreadGroup tgs = psos_.accumulatePassTGS;
454 
455     auto& binder = binders_.accumulatePass;
456     if (!binder) {
457         return;
458     }
459 
460     uint32_t readHistoryIdx = targets_.historyBufferIdx ^ 1;
461     uint32_t writeHistoryIdx = targets_.historyBufferIdx;
462     {
463         binder->ClearBindings();
464         // Bindings
465         // - 0: Adjusted Color + Depth Clip (rgba16f)
466         // - 1: Dilated Reactive Mask (rg16f)
467         // - 2: Sampler
468         // - 3: New Locks Mask (r8ui)
469         // - 4: Dilated Motion Vectors (rg16f)
470 
471         // Input History Textures (Previous Frame Data - Display Resolution)
472         // - 5: History Color + Reactive Alpha (rgba16f)
473         // - 6: History Lock Status (rg16f)
474         // - 7: History Luma (rgba16f)
475 
476         // Output History Textures (Current Frame Data - Display Resolution)
477         // - 8: History Color + Reactive Alpha (rgba16f)
478         // - 9: History Lock Status (rg16f)
479         // - 10: History Luma (rgba16f)
480 
481         // Final Output Texture
482         // - 11: Output Color (rgba16f) - Final Result [WRITE]
483         // - 13: Luminance pyramid last mip (avg exposure)
484         binder->BindImage(0U, { targets_.adjustedColorBuffer.GetHandle() });
485         binder->BindImage(1U, { targets_.dilatedReactiveMask.GetHandle() });
486         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
487 
488         binder->BindImage(3U, { targets_.newLocksMask.GetHandle() });
489         binder->BindImage(4U, { targets_.dilatedMotionVectors[targets_.motionVectorIdx].GetHandle() });
490         // (Sampled Image)
491 
492         // --- History Buffers (Display Resolution) ---
493         binder->BindImage(5U, { targets_.historyColorAndReactive[readHistoryIdx].GetHandle() });
494         binder->BindImage(6U, { targets_.historyLockStatus[readHistoryIdx].GetHandle() });
495         binder->BindImage(7U, { targets_.historyLuma[readHistoryIdx].GetHandle() });
496 
497         // Write current frame's history (Storage Image)
498         binder->BindImage(8U, { targets_.historyColorAndReactive[writeHistoryIdx].GetHandle() });
499         binder->BindImage(9U, { targets_.historyLockStatus[writeHistoryIdx].GetHandle() });
500         binder->BindImage(10U, { targets_.historyLuma[writeHistoryIdx].GetHandle() });
501 
502         binder->BindImage(11U, { targets_.finalColor.GetHandle() });
503 
504         // Bind camera UBO
505         binder->BindBuffer(12U, targets_.cameraUbo, 0);
506 
507         // Bind luminance pyramid texture level 4
508         binder->BindImage(13U, targets_.tex1[mipLevels_ - 1].GetHandle());
509 
510         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
511         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
512 
513         const auto renderSize = targets_.renderResolution;
514         const auto displaySize = targets_.displayResolution;
515         AccumulatePassPushConstant uPc;
516         uPc.displaySizeInvSize = Math::Vec4(
517             float(displaySize.x), float(displaySize.y), 1.0f / float(displaySize.x), 1.0f / float(displaySize.y));
518         uPc.viewportSizeInvSize = Math::Vec4(
519             float(renderSize.x), float(renderSize.y), 1.0f / float(renderSize.x), 1.0f / float(renderSize.y));
520 
521         uPc.frameIndex = 1;
522 
523         uPc.jitterSequenceLength = 16; // 16: length
524         if (uPc.jitterSequenceLength > 0) {
525             uPc.avgLanczosWeightPerFrame = 1.0f / float(uPc.jitterSequenceLength);
526         } else {
527             uPc.avgLanczosWeightPerFrame = 1.0f;
528         }
529         uPc.maxAccumulationLanczosWeight = 0.98f;
530 
531         cmdList.PushConstantData(pc, arrayviewU8(uPc));
532         cmdList.Dispatch((displaySize.x + tgs.x - 1) / tgs.x, (displaySize.y + tgs.y - 1) / tgs.y, 1);
533     }
534 }
535 
ComputeRcas(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)536 void RenderPostProcessUpscaleNode::ComputeRcas(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
537 {
538     // bind pso
539     cmdList.BindPipeline(psos_.rcasPass);
540     const ShaderThreadGroup tgs = psos_.rcasPassTGS;
541 
542     auto& binder = binders_.rcasPass;
543     if (!binder) {
544         return;
545     }
546     {
547         binder->ClearBindings();
548 
549         binder->BindImage(0U, { targets_.finalColor.GetHandle() });
550         binder->BindImage(1U, { targets_.rcas_final.GetHandle() });
551         binder->BindSampler(2U, { samplerHandle_.GetHandle() });
552 
553         cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
554         cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
555 
556         const auto targetSize = targets_.displayResolution;
557 
558         LocalPostProcessPushConstantStruct uPc;
559         uPc.viewportSizeInvSize = Math::Vec4(
560             float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
561         // .x = exposure, .y = pre exposure, .z = sharpness, .w = 0
562         // Sharpness : 0 = max, 1 = -1 stop, 2 = -2 stops
563         float exposure = 0.7f;
564         float preExposure = 1.0f;
565         float sharpness = 0.0f;
566         uPc.factor = Math::Vec4(exposure, preExposure, sharpness, 0.0f);
567         cmdList.PushConstantData(pc, arrayviewU8(uPc));
568 
569         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
570     }
571 }
572 
CreateTargets(const BASE_NS::Math::UVec2 baseSize)573 void RenderPostProcessUpscaleNode::CreateTargets(const BASE_NS::Math::UVec2 baseSize)
574 {
575     if (baseSize.x != baseSize_.x || baseSize.y != baseSize_.y) {
576         baseSize_ = baseSize;
577         // We only store the luminance value in texture
578         ImageUsageFlags usageFlags =
579             CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT | CORE_IMAGE_USAGE_TRANSFER_SRC_BIT;
580 
581         targets_.renderResolution = baseSize;
582         const uint32_t dSizeX = uint32_t(float(baseSize.x) * effectProperties_.ratio);
583         const uint32_t dSizeY = uint32_t(float(baseSize.y) * effectProperties_.ratio);
584 
585         targets_.displayResolution = BASE_NS::Math::UVec2(dSizeX, dSizeY); // effectProperties_.ratio);
586 
587         // create target image
588         const Math::UVec2 startTargetSize = baseSize;
589         GpuImageDesc desc {
590             ImageType::CORE_IMAGE_TYPE_2D,
591             ImageViewType::CORE_IMAGE_VIEW_TYPE_2D,
592             Format::BASE_FORMAT_R16_SFLOAT,
593             ImageTiling::CORE_IMAGE_TILING_OPTIMAL,
594             usageFlags,
595             MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
596             0,
597             EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_DYNAMIC_BARRIERS,
598             startTargetSize.x,
599             startTargetSize.y,
600             1u,
601             1u,
602             1u,
603             SampleCountFlagBits::CORE_SAMPLE_COUNT_1_BIT,
604             {},
605         };
606 
607         auto& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
608 #if (RENDER_VALIDATION_ENABLED == 1)
609         const string_view nodeName = renderNodeContextMgr_->GetName();
610 #endif
611         for (size_t idx = 0; idx < targets_.tex1.size(); ++idx) {
612             // every upscale target is half the size of the original/ previous upscale target
613             desc.width /= 2U;
614             desc.height /= 2U;
615             desc.width = (desc.width >= 1U) ? desc.width : 1U;
616             desc.height = (desc.height >= 1U) ? desc.height : 1U;
617             targets_.tex1Size[idx] = Math::UVec2(desc.width, desc.height);
618 #if (RENDER_VALIDATION_ENABLED == 1)
619             const auto baseTargetName = nodeName + "_lsr_luminance_Mip" + to_string(idx);
620             targets_.tex1[idx] = gpuResourceMgr.Create(baseTargetName, desc);
621 #else
622             targets_.tex1[idx] = gpuResourceMgr.Create(desc);
623 #endif
624         }
625 
626         // Dilate and reconstruct targets at render resolution
627         desc.width = baseSize.x;
628         desc.height = baseSize.y;
629         desc.usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
630 
631         // Create dilated depth
632         {
633             desc.format = Format::BASE_FORMAT_R16_UINT;
634 #if (RENDER_VALIDATION_ENABLED == 1)
635             const auto baseTargetName = nodeName + "_lsr_dilated_depth";
636             targets_.dilatedDepth = gpuResourceMgr.Create(baseTargetName, desc);
637 #else
638             targets_.dilatedDepth = gpuResourceMgr.Create(desc);
639 #endif
640         }
641 
642         // Create dilated motion vectors
643 
644         desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
645         for (int i = 0; i < 2U; ++i) {
646 #if (RENDER_VALIDATION_ENABLED == 1)
647             const auto name = nodeName + "_lsr_dilated_motion_vector_" + ((i == 0) ? "A" : "B");
648             targets_.dilatedMotionVectors[i] = gpuResourceMgr.Create(name, desc);
649 #else
650             targets_.dilatedMotionVectors[i] = gpuResourceMgr.Create(desc);
651 #endif
652         }
653 
654         // Create lock input luma
655         {
656             desc.format = Format::BASE_FORMAT_R16_SFLOAT;
657 #if (RENDER_VALIDATION_ENABLED == 1)
658             const auto baseTargetName = nodeName + "_lsr_lock_input_luma";
659             targets_.lockInputLuma = gpuResourceMgr.Create(baseTargetName, desc);
660 #else
661             targets_.lockInputLuma = gpuResourceMgr.Create(desc);
662 #endif
663         }
664 
665         // Create previous depth for the current frame
666         {
667             desc.format = Format::BASE_FORMAT_R32_UINT;
668 #if (RENDER_VALIDATION_ENABLED == 1)
669             const auto baseTargetName = nodeName + "_lsr_previous_depth";
670             targets_.estPrevDepth = gpuResourceMgr.Create(baseTargetName, desc);
671 #else
672             targets_.estPrevDepth = gpuResourceMgr.Create(desc);
673 #endif
674         }
675         // Debug depth map texture
676         {
677             desc.format = Format::BASE_FORMAT_R32_SFLOAT;
678 #if (RENDER_VALIDATION_ENABLED == 1)
679             const auto baseTargetName = nodeName + "_lsr_debug_depth";
680             targets_.debugImage = gpuResourceMgr.Create(baseTargetName, desc);
681 #else
682             targets_.debugImage = gpuResourceMgr.Create(desc);
683 #endif
684         }
685         {
686             desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
687 #if (RENDER_VALIDATION_ENABLED == 1)
688             const auto baseTargetName = nodeName + "_lsr_adjusted_color";
689             targets_.adjustedColorBuffer = gpuResourceMgr.Create(baseTargetName, desc);
690 #else
691             targets_.adjustedColorBuffer = gpuResourceMgr.Create(desc);
692 #endif
693         }
694         // Dilated reactive mask texture
695         {
696             desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
697 #if (RENDER_VALIDATION_ENABLED == 1)
698             const auto baseTargetName = nodeName + "_lsr_reactive_mask";
699             targets_.dilatedReactiveMask = gpuResourceMgr.Create(baseTargetName, desc);
700 #else
701             targets_.dilatedReactiveMask = gpuResourceMgr.Create(desc);
702 #endif
703         }
704         // Create locks mask texture
705         {
706             desc.width = targets_.displayResolution.x;
707             desc.height = targets_.displayResolution.y;
708             desc.format = Format::BASE_FORMAT_R8_UNORM;
709 #if (RENDER_VALIDATION_ENABLED == 1)
710             const auto baseTargetName = nodeName + "_lsr_new_locks_mask";
711             targets_.newLocksMask = gpuResourceMgr.Create(baseTargetName, desc);
712 #else
713             targets_.newLocksMask = gpuResourceMgr.Create(desc);
714 #endif
715         }
716 
717         // ---------- Create Accumulate Pass History Buffers (DISPLAY resolution) ----------
718         desc.width = targets_.displayResolution.x;
719         desc.height = targets_.displayResolution.y;
720         desc.usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
721 
722         // Color History (RGBA16F)
723         desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
724         for (int i = 0; i < 2U; ++i) {
725 #if (RENDER_VALIDATION_ENABLED == 1)
726             const auto name = nodeName + "_lsr_history_color_" + ((i == 0) ? "A" : "B");
727             targets_.historyColorAndReactive[i] = gpuResourceMgr.Create(name, desc);
728 #else
729             targets_.historyColorAndReactive[i] = gpuResourceMgr.Create(desc);
730 #endif
731         }
732 
733         // Lock Status History (RG16F)
734         desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
735         for (int i = 0; i < 2U; ++i) {
736 #if (RENDER_VALIDATION_ENABLED == 1)
737             const auto name = nodeName + "_lsr_history_lock_" + ((i == 0) ? "A" : "B");
738             targets_.historyLockStatus[i] = gpuResourceMgr.Create(name, desc);
739 #else
740             targets_.historyLockStatus[i] = gpuResourceMgr.Create(desc);
741 #endif
742         }
743 
744         // Luma History (RGBA16F)
745         desc.format = Format::BASE_FORMAT_R8G8B8A8_UNORM;
746         for (int i = 0; i < 2U; ++i) {
747 #if (RENDER_VALIDATION_ENABLED == 1)
748             const auto name = nodeName + "_lsr_history_luma_" + ((i == 0) ? "A" : "B");
749             targets_.historyLuma[i] = gpuResourceMgr.Create(name, desc);
750 #else
751             targets_.historyLuma[i] = gpuResourceMgr.Create(desc);
752 #endif
753         }
754 
755         {
756             desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
757 #if (RENDER_VALIDATION_ENABLED == 1)
758             const auto baseTargetName = nodeName + "_lsr_final_color";
759             targets_.finalColor = gpuResourceMgr.Create(baseTargetName, desc);
760 #else
761             targets_.finalColor = gpuResourceMgr.Create(desc);
762 #endif
763         }
764 
765         // Optional RCAS pass
766         {
767             desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
768 #if (RENDER_VALIDATION_ENABLED == 1)
769             const auto baseTargetName = nodeName + "_lsr_rcas_out_color";
770             targets_.rcas_final = gpuResourceMgr.Create(baseTargetName, desc);
771 #else
772             targets_.rcas_final = gpuResourceMgr.Create(desc);
773 #endif
774         }
775     }
776 }
777 
CreatePsos()778 void RenderPostProcessUpscaleNode::CreatePsos()
779 {
780     if (binders_.accumulatePass) {
781         return;
782     }
783 
784     const auto& shaderMgr = renderNodeContextMgr_->GetShaderManager();
785     INodeContextPsoManager& psoMgr = renderNodeContextMgr_->GetPsoManager();
786     INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr_->GetDescriptorSetManager();
787 
788     // Luminance Hierarchy pass
789     {
790         const RenderHandle shader =
791             shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_luminance_downscale.shader");
792         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
793 
794         psos_.luminanceDownscale = psoMgr.GetComputePsoHandle(shader, pl, {});
795         psos_.luminanceDownscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
796 
797         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
798         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
799         binders_.luminanceDownscale = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
800     }
801 
802     // Luminance Hierarchy pass
803     {
804         const RenderHandle shader =
805             shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_luminance_pyramid.shader");
806         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
807 
808         psos_.luminancePyramid = psoMgr.GetComputePsoHandle(shader, pl, {});
809         psos_.luminancePyramidTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
810 
811         constexpr uint32_t localSetIdx = 0U;
812         const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
813 
814         for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
815             binders_.luminancePyramid[idx] =
816                 dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
817         }
818     }
819     // Dilate and reconstruct pass
820     {
821         const RenderHandle shader =
822             shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_dilate_and_reconstruct.shader");
823         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
824 
825         psos_.reconstructAndDilate = psoMgr.GetComputePsoHandle(shader, pl, {});
826         psos_.reconstructAndDilateTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
827 
828         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
829         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
830         binders_.reconstructAndDilate = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
831     }
832 
833     // Debug depth map pass
834     {
835         const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_debug_depth.shader");
836         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
837 
838         psos_.debugPass = psoMgr.GetComputePsoHandle(shader, pl, {});
839         psos_.debugPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
840 
841         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
842         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
843         binders_.debugPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
844     }
845 
846     // Clipping pass
847     {
848         const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_depth_clip.shader");
849         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
850 
851         psos_.depthClipPass = psoMgr.GetComputePsoHandle(shader, pl, {});
852         psos_.depthClipPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
853 
854         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
855         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
856         binders_.depthClipPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
857     }
858 
859     // Create new locks
860     {
861         const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_lock.shader");
862         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
863 
864         psos_.locksPass = psoMgr.GetComputePsoHandle(shader, pl, {});
865         psos_.locksPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
866 
867         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
868         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
869         binders_.locksPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
870     }
871 
872     // Accumulate pass
873     {
874         const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_accumulate.shader");
875         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
876 
877         psos_.accumulatePass = psoMgr.GetComputePsoHandle(shader, pl, {});
878         psos_.accumulatePassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
879 
880         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
881         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
882         binders_.accumulatePass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
883     }
884 
885     // Rcas pass
886     {
887         const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_rcas.shader");
888         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
889 
890         psos_.rcasPass = psoMgr.GetComputePsoHandle(shader, pl, {});
891         psos_.rcasPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
892 
893         const auto& bindings = pl.descriptorSetLayouts[0].bindings;
894         RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
895         binders_.rcasPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
896     }
897 }
898 
EvaluateOutput()899 void RenderPostProcessUpscaleNode::EvaluateOutput()
900 {
901     if (RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
902         nodeOutputsData.output = nodeInputsData.input;
903     }
904 }
905 RENDER_END_NAMESPACE()
906