1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_post_process_upscale_node.h"
17
18 #include <base/containers/unique_ptr.h>
19 #include <base/math/matrix_util.h>
20 #include <core/log.h>
21 #include <core/property/property_handle_util.h>
22 #include <core/property_tools/property_api_impl.inl>
23 #include <render/device/intf_gpu_resource_manager.h>
24 #include <render/nodecontext/intf_node_context_descriptor_set_manager.h>
25 #include <render/nodecontext/intf_node_context_pso_manager.h>
26 #include <render/nodecontext/intf_render_command_list.h>
27 #include <render/nodecontext/intf_render_node_context_manager.h>
28 #include <render/nodecontext/intf_render_node_parser_util.h>
29 #include <render/nodecontext/intf_render_node_util.h>
30 #include <render/property/property_types.h>
31
32 #include "default_engine_constants.h"
33 #include "postprocesses/render_post_process_upscale.h"
34 #include "util/log.h"
35
36 // shaders
37 #include <render/shaders/common/render_post_process_structs_common.h>
38
39 using namespace BASE_NS;
40 using namespace CORE_NS;
41 using namespace RENDER_NS;
42
43 CORE_BEGIN_NAMESPACE()
44 DATA_TYPE_METADATA(RenderPostProcessUpscaleNode::NodeInputs, MEMBER_PROPERTY(input, "input", 0),
45 MEMBER_PROPERTY(depth, "depth", 0), MEMBER_PROPERTY(velocity, "velocity", 0))
46 DATA_TYPE_METADATA(RenderPostProcessUpscaleNode::NodeOutputs, MEMBER_PROPERTY(output, "output", 0))
CORE_END_NAMESPACE()47 CORE_END_NAMESPACE()
48
49 RENDER_BEGIN_NAMESPACE()
50
51 RenderPostProcessUpscaleNode::RenderPostProcessUpscaleNode()
52 : inputProperties_(
53 &nodeInputsData, array_view(PropertyType::DataType<RenderPostProcessUpscaleNode::NodeInputs>::properties)),
54 outputProperties_(
55 &nodeOutputsData, array_view(PropertyType::DataType<RenderPostProcessUpscaleNode::NodeOutputs>::properties))
56
57 {}
58
GetRenderInputProperties()59 IPropertyHandle* RenderPostProcessUpscaleNode::GetRenderInputProperties()
60 {
61 return inputProperties_.GetData();
62 }
63
GetRenderOutputProperties()64 IPropertyHandle* RenderPostProcessUpscaleNode::GetRenderOutputProperties()
65 {
66 return outputProperties_.GetData();
67 }
68
GetRenderDescriptorCounts() const69 DescriptorCounts RenderPostProcessUpscaleNode::GetRenderDescriptorCounts() const
70 {
71 return DescriptorCounts { {
72 { CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 32u },
73 { CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 32u },
74 { CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE, 32u },
75 { CORE_DESCRIPTOR_TYPE_SAMPLER, 24u },
76 } };
77 }
78
SetRenderAreaRequest(const RenderAreaRequest & renderAreaRequest)79 void RenderPostProcessUpscaleNode::SetRenderAreaRequest(const RenderAreaRequest& renderAreaRequest)
80 {
81 useRequestedRenderArea_ = true;
82 renderAreaRequest_ = renderAreaRequest;
83 }
84
Init(const IRenderPostProcess::Ptr & postProcess,IRenderNodeContextManager & renderNodeContextMgr)85 void RenderPostProcessUpscaleNode::Init(
86 const IRenderPostProcess::Ptr& postProcess, IRenderNodeContextManager& renderNodeContextMgr)
87 {
88 renderNodeContextMgr_ = &renderNodeContextMgr;
89 postProcess_ = postProcess;
90 renderCopyOutput_.Init(renderNodeContextMgr);
91
92 auto& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
93 samplerHandle_ = gpuResourceMgr.Create(samplerHandle_,
94 GpuSamplerDesc {
95 Filter::CORE_FILTER_LINEAR, // magFilter
96 Filter::CORE_FILTER_LINEAR, // minFilter
97 Filter::CORE_FILTER_LINEAR, // mipMapMode
98 SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeU
99 SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeV
100 SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeW
101 });
102 SetCameraData();
103
104 valid_ = true;
105 }
106
PreExecute()107 void RenderPostProcessUpscaleNode::PreExecute()
108 {
109 if (valid_ && postProcess_) {
110 const array_view<const uint8_t> propertyView = postProcess_->GetData();
111 // this node is directly dependant
112 PLUGIN_ASSERT(propertyView.size_bytes() == sizeof(RenderPostProcessUpscaleNode::EffectProperties));
113 if (propertyView.size_bytes() == sizeof(RenderPostProcessUpscaleNode::EffectProperties)) {
114 effectProperties_ = (const RenderPostProcessUpscaleNode::EffectProperties&)(*propertyView.data());
115 }
116 effectProperties_.ratio = 1.5f;
117 const GpuImageDesc& imgDesc =
118 renderNodeContextMgr_->GetGpuResourceManager().GetImageDescriptor(nodeInputsData.input.handle);
119
120 mipLevels_ = Math::max(1U, TARGET_COUNT);
121 // floor(log2(Math::max(imgDesc.width, imgDesc.height)));
122 CreateTargets(Math::UVec2(imgDesc.width, imgDesc.height));
123 if (effectProperties_.enabled) {
124 // check input and output
125 EvaluateOutput();
126 }
127 } else {
128 effectProperties_.enabled = false;
129 }
130 }
131
GetExecuteFlags() const132 IRenderNode::ExecuteFlags RenderPostProcessUpscaleNode::GetExecuteFlags() const
133 {
134 if (effectProperties_.enabled) {
135 return 0;
136 } else {
137 return IRenderNode::ExecuteFlagBits::EXECUTE_FLAG_BITS_DO_NOT_EXECUTE;
138 }
139 }
140
Execute(IRenderCommandList & cmdList)141 void RenderPostProcessUpscaleNode::Execute(IRenderCommandList& cmdList)
142 {
143 if (!valid_) {
144 return;
145 }
146 // NOTE: target counts etc. should probably be resized based on configuration
147
148 CreatePsos();
149
150 EvaluateOutput();
151 BindableImage currOutput = nodeOutputsData.output;
152 if (!RenderHandleUtil::IsValid(currOutput.handle)) {
153 return;
154 }
155
156 constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
157 sizeof(LocalPostProcessPushConstantStruct) };
158 // update the output
159 nodeOutputsData.output = currOutput;
160 RENDER_DEBUG_MARKER_SCOPE(cmdList, "Upscaling (LSR)");
161 {
162 RENDER_DEBUG_MARKER_COL_SCOPE(
163 cmdList, "Compute Luminance Hierarchy", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
164 ComputeLuminancePyramid(pc, cmdList);
165 }
166 {
167 RENDER_DEBUG_MARKER_COL_SCOPE(
168 cmdList, "Dilate and reconstruct", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
169 ComputeReconstructAndDilate(pc, cmdList);
170 }
171 {
172 RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Depth clip", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
173 ComputeDepthClip(pc, cmdList);
174 }
175 {
176 constexpr PushConstant lockPassPc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
177 sizeof(LockPassPushConstant) };
178 RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Create Locks", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
179 ComputeCreateLocks(lockPassPc, cmdList);
180 }
181 {
182 constexpr PushConstant accumulatePassPc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
183 sizeof(AccumulatePassPushConstant) };
184 RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "Accumulate", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
185 ComputeAccumulate(accumulatePassPc, cmdList);
186 }
187
188 if (targets_.rcas_enabled) {
189 RENDER_DEBUG_MARKER_COL_SCOPE(cmdList, "RCAS", DefaultDebugConstants::CORE_DEFAULT_DEBUG_COLOR);
190 ComputeRcas(pc, cmdList);
191 }
192
193 // Toggle between history and current motion vector texture
194 targets_.motionVectorIdx ^= 1;
195 targets_.historyBufferIdx ^= 1;
196 IRenderNodeCopyUtil::CopyInfo copyInfo;
197 BindableImage finalColor;
198
199 targets_.rcas_enabled ? finalColor.handle = targets_.rcas_final.GetHandle()
200 : finalColor.handle = targets_.finalColor.GetHandle();
201 copyInfo.input = finalColor;
202 copyInfo.output = nodeOutputsData.output;
203 renderCopyOutput_.Execute(cmdList, copyInfo);
204 }
205
SetCameraData()206 void RenderPostProcessUpscaleNode::SetCameraData()
207 {
208 // change this
209 // Temporary workaround to send camera data from 3D to Render
210 RenderHandle cameraUbo = renderNodeContextMgr_->GetGpuResourceManager().GetBufferHandle(
211 "RenderDataStoreDefaultSceneCORE3D_DM_CAM_DATA_BUF");
212 targets_.cameraUbo = cameraUbo;
213 }
214
ComputeLuminancePyramid(const PushConstant & pc,IRenderCommandList & cmdList)215 void RenderPostProcessUpscaleNode::ComputeLuminancePyramid(const PushConstant& pc, IRenderCommandList& cmdList)
216 {
217 // bind downscale pso
218 cmdList.BindPipeline(psos_.luminanceDownscale);
219 const ShaderThreadGroup tgs = psos_.luminanceDownscaleTGS;
220
221 if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
222 return;
223 }
224 //-----------------------------------------------------------
225 // Pass #1: generate luminance texture mip 0 by sampling the original input
226 //-----------------------------------------------------------
227 {
228 auto& binder = binders_.luminanceDownscale;
229 binder->ClearBindings();
230
231 binder->BindImage(0U, { targets_.tex1[0].GetHandle() });
232 binder->BindImage(1U, { nodeInputsData.input.handle });
233 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
234
235 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
236 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
237
238 const auto targetSize = targets_.tex1Size[0];
239 LocalPostProcessPushConstantStruct uPc;
240 uPc.viewportSizeInvSize = Math::Vec4(
241 float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
242
243 cmdList.PushConstantData(pc, arrayviewU8(uPc));
244 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
245 }
246
247 // bind hierarchy pso
248 cmdList.BindPipeline(psos_.luminancePyramid);
249 const ShaderThreadGroup PyramidTgs = psos_.luminancePyramidTGS;
250 ////-----------------------------------------------------------
251 //// Pass #2..N: generate each subsequent mip from the previous
252 ////-----------------------------------------------------------
253 for (size_t i = 1; i < mipLevels_; ++i) {
254 {
255 auto& binder = binders_.luminancePyramid[i];
256 const RenderHandle setHandle = binder->GetDescriptorSetHandle();
257 binder->ClearBindings();
258
259 binder->BindImage(0U, { targets_.tex1[i].GetHandle() });
260 binder->BindImage(1U, { targets_.tex1[i - 1].GetHandle() });
261 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
262
263 cmdList.UpdateDescriptorSet(
264 binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
265 cmdList.BindDescriptorSet(0U, setHandle);
266 }
267
268 const auto targetSize = targets_.tex1Size[i];
269
270 LocalPostProcessPushConstantStruct uPc;
271 uPc.viewportSizeInvSize = Math::Vec4(
272 float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
273 cmdList.PushConstantData(pc, arrayviewU8(uPc));
274
275 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
276 }
277 }
278
ComputeReconstructAndDilate(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)279 void RenderPostProcessUpscaleNode::ComputeReconstructAndDilate(
280 const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
281 {
282 cmdList.BindPipeline(psos_.reconstructAndDilate);
283 const ShaderThreadGroup& tgs = psos_.reconstructAndDilateTGS;
284
285 const uint32_t motionVecIdx = targets_.motionVectorIdx;
286
287 auto& binder = binders_.reconstructAndDilate;
288 if (!binder)
289 return;
290 {
291 binder->ClearBindings();
292
293 // Bindings
294 // - 0: uPreviousDepth (R32_UINT)
295 // - 1: DilatedDepth (R16_UINT)
296 // - 2: sampler
297 // - 3: DilatedMotion (R16G16F)
298 // - 4: LockInputLuma (R16F)
299 // - 5: Depth (sampler2D)
300 // - 6: Velocity
301 // - 7: Color
302 binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
303 binder->BindImage(1U, { targets_.dilatedDepth.GetHandle() });
304 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
305 binder->BindImage(3U, { targets_.dilatedMotionVectors[motionVecIdx].GetHandle() });
306 binder->BindImage(4U, { targets_.lockInputLuma.GetHandle() });
307 binder->BindImage(5U, { nodeInputsData.depth.handle });
308 binder->BindImage(6U, { nodeInputsData.velocity.handle });
309 binder->BindImage(7U, { nodeInputsData.input.handle });
310
311 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
312 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
313
314 const auto targetSize = targets_.renderResolution;
315 LocalPostProcessPushConstantStruct uPc;
316 uPc.viewportSizeInvSize = Math::Vec4(
317 float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
318 cmdList.PushConstantData(pc, arrayviewU8(uPc));
319
320 // Dispatch
321 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
322 }
323 }
324
ComputeDebug(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)325 void RenderPostProcessUpscaleNode::ComputeDebug(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
326 {
327 // bind pso
328 cmdList.BindPipeline(psos_.debugPass);
329 const ShaderThreadGroup tgs = psos_.debugPassTGS;
330
331 auto& binder = binders_.debugPass;
332 if (!binder) {
333 return;
334 }
335 if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
336 return;
337 }
338 {
339 binder->ClearBindings();
340
341 binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
342 binder->BindImage(1U, { targets_.debugImage.GetHandle() });
343 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
344
345 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
346 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
347
348 const auto targetSize = targets_.renderResolution;
349
350 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
351 }
352 }
353
ComputeDepthClip(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)354 void RenderPostProcessUpscaleNode::ComputeDepthClip(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
355 {
356 // bind pso
357 cmdList.BindPipeline(psos_.depthClipPass);
358 const ShaderThreadGroup tgs = psos_.depthClipPassTGS;
359
360 const uint32_t curMotionVecIdx = targets_.motionVectorIdx;
361 const uint32_t prevMotionVecIdx = curMotionVecIdx ^ 1;
362
363 auto& binder = binders_.depthClipPass;
364 if (!binder) {
365 return;
366 }
367 {
368 binder->ClearBindings();
369
370 // Bindings
371 // - 0: estPrevDepth (R32_UINT)
372 // - 1: DilatedDepth (R16_UINT)
373 // - 2: sampler
374 // - 3: DilatedMotion (R16G16F)
375 // - 5: Input Color (sampler2D)
376 // - 6: Depth
377 // - 7: Velocity
378 // - 7: Adjusted Color output (R16G16B16A16F)
379 // - 8: Dilated reactive masks output (R16G16F)
380 binder->BindImage(0U, { targets_.estPrevDepth.GetHandle() });
381 binder->BindImage(1U, { targets_.dilatedDepth.GetHandle() });
382 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
383 binder->BindImage(3U, { targets_.dilatedMotionVectors[curMotionVecIdx].GetHandle() });
384 binder->BindImage(4U, { targets_.dilatedMotionVectors[prevMotionVecIdx].GetHandle() });
385
386 binder->BindImage(5U, { nodeInputsData.input.handle });
387 binder->BindImage(6U, { nodeInputsData.velocity.handle });
388 binder->BindImage(7U, { targets_.adjustedColorBuffer.GetHandle() });
389 binder->BindImage(8U, { targets_.dilatedReactiveMask.GetHandle() });
390
391 // Bind camera UBO
392 binder->BindBuffer(12U, targets_.cameraUbo, 0);
393
394 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
395 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
396
397 const auto targetSize = targets_.renderResolution;
398 LocalPostProcessPushConstantStruct uPc;
399 uPc.viewportSizeInvSize = Math::Vec4(
400 float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
401 cmdList.PushConstantData(pc, arrayviewU8(uPc));
402
403 // Dispatch
404 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
405 }
406 }
407
ComputeCreateLocks(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)408 void RenderPostProcessUpscaleNode::ComputeCreateLocks(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
409 {
410 // bind pso
411 cmdList.BindPipeline(psos_.locksPass);
412 const ShaderThreadGroup tgs = psos_.locksPassTGS;
413
414 auto& binder = binders_.locksPass;
415 if (!binder) {
416 return;
417 }
418 if (!RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
419 return;
420 }
421 {
422 binder->ClearBindings();
423
424 binder->BindImage(0U, { targets_.lockInputLuma.GetHandle() });
425 binder->BindImage(1U, { targets_.newLocksMask.GetHandle() });
426 binder->BindImage(3U, { targets_.estPrevDepth.GetHandle() });
427 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
428
429 // Bind camera UBO
430 binder->BindBuffer(12U, targets_.cameraUbo, 0);
431
432 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
433 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
434
435 const auto renderSize = targets_.renderResolution;
436 const auto displaySize = targets_.displayResolution;
437
438 LockPassPushConstant uPc;
439 uPc.renderSizeInvSize = Math::Vec4(
440 float(renderSize.x), float(renderSize.y), 1.0f / float(renderSize.x), 1.0f / float(renderSize.y));
441 uPc.displaySizeInvSize = Math::Vec4(
442 float(displaySize.x), float(displaySize.y), 1.0f / float(displaySize.x), 1.0f / float(displaySize.y));
443
444 cmdList.PushConstantData(pc, arrayviewU8(uPc));
445 cmdList.Dispatch((renderSize.x + tgs.x - 1) / tgs.x, (renderSize.y + tgs.y - 1) / tgs.y, 1);
446 }
447 }
448
ComputeAccumulate(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)449 void RenderPostProcessUpscaleNode::ComputeAccumulate(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
450 {
451 // bind pso
452 cmdList.BindPipeline(psos_.accumulatePass);
453 const ShaderThreadGroup tgs = psos_.accumulatePassTGS;
454
455 auto& binder = binders_.accumulatePass;
456 if (!binder) {
457 return;
458 }
459
460 uint32_t readHistoryIdx = targets_.historyBufferIdx ^ 1;
461 uint32_t writeHistoryIdx = targets_.historyBufferIdx;
462 {
463 binder->ClearBindings();
464 // Bindings
465 // - 0: Adjusted Color + Depth Clip (rgba16f)
466 // - 1: Dilated Reactive Mask (rg16f)
467 // - 2: Sampler
468 // - 3: New Locks Mask (r8ui)
469 // - 4: Dilated Motion Vectors (rg16f)
470
471 // Input History Textures (Previous Frame Data - Display Resolution)
472 // - 5: History Color + Reactive Alpha (rgba16f)
473 // - 6: History Lock Status (rg16f)
474 // - 7: History Luma (rgba16f)
475
476 // Output History Textures (Current Frame Data - Display Resolution)
477 // - 8: History Color + Reactive Alpha (rgba16f)
478 // - 9: History Lock Status (rg16f)
479 // - 10: History Luma (rgba16f)
480
481 // Final Output Texture
482 // - 11: Output Color (rgba16f) - Final Result [WRITE]
483 // - 13: Luminance pyramid last mip (avg exposure)
484 binder->BindImage(0U, { targets_.adjustedColorBuffer.GetHandle() });
485 binder->BindImage(1U, { targets_.dilatedReactiveMask.GetHandle() });
486 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
487
488 binder->BindImage(3U, { targets_.newLocksMask.GetHandle() });
489 binder->BindImage(4U, { targets_.dilatedMotionVectors[targets_.motionVectorIdx].GetHandle() });
490 // (Sampled Image)
491
492 // --- History Buffers (Display Resolution) ---
493 binder->BindImage(5U, { targets_.historyColorAndReactive[readHistoryIdx].GetHandle() });
494 binder->BindImage(6U, { targets_.historyLockStatus[readHistoryIdx].GetHandle() });
495 binder->BindImage(7U, { targets_.historyLuma[readHistoryIdx].GetHandle() });
496
497 // Write current frame's history (Storage Image)
498 binder->BindImage(8U, { targets_.historyColorAndReactive[writeHistoryIdx].GetHandle() });
499 binder->BindImage(9U, { targets_.historyLockStatus[writeHistoryIdx].GetHandle() });
500 binder->BindImage(10U, { targets_.historyLuma[writeHistoryIdx].GetHandle() });
501
502 binder->BindImage(11U, { targets_.finalColor.GetHandle() });
503
504 // Bind camera UBO
505 binder->BindBuffer(12U, targets_.cameraUbo, 0);
506
507 // Bind luminance pyramid texture level 4
508 binder->BindImage(13U, targets_.tex1[mipLevels_ - 1].GetHandle());
509
510 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
511 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
512
513 const auto renderSize = targets_.renderResolution;
514 const auto displaySize = targets_.displayResolution;
515 AccumulatePassPushConstant uPc;
516 uPc.displaySizeInvSize = Math::Vec4(
517 float(displaySize.x), float(displaySize.y), 1.0f / float(displaySize.x), 1.0f / float(displaySize.y));
518 uPc.viewportSizeInvSize = Math::Vec4(
519 float(renderSize.x), float(renderSize.y), 1.0f / float(renderSize.x), 1.0f / float(renderSize.y));
520
521 uPc.frameIndex = 1;
522
523 uPc.jitterSequenceLength = 16; // 16: length
524 if (uPc.jitterSequenceLength > 0) {
525 uPc.avgLanczosWeightPerFrame = 1.0f / float(uPc.jitterSequenceLength);
526 } else {
527 uPc.avgLanczosWeightPerFrame = 1.0f;
528 }
529 uPc.maxAccumulationLanczosWeight = 0.98f;
530
531 cmdList.PushConstantData(pc, arrayviewU8(uPc));
532 cmdList.Dispatch((displaySize.x + tgs.x - 1) / tgs.x, (displaySize.y + tgs.y - 1) / tgs.y, 1);
533 }
534 }
535
ComputeRcas(const PushConstant & pc,RENDER_NS::IRenderCommandList & cmdList)536 void RenderPostProcessUpscaleNode::ComputeRcas(const PushConstant& pc, RENDER_NS::IRenderCommandList& cmdList)
537 {
538 // bind pso
539 cmdList.BindPipeline(psos_.rcasPass);
540 const ShaderThreadGroup tgs = psos_.rcasPassTGS;
541
542 auto& binder = binders_.rcasPass;
543 if (!binder) {
544 return;
545 }
546 {
547 binder->ClearBindings();
548
549 binder->BindImage(0U, { targets_.finalColor.GetHandle() });
550 binder->BindImage(1U, { targets_.rcas_final.GetHandle() });
551 binder->BindSampler(2U, { samplerHandle_.GetHandle() });
552
553 cmdList.UpdateDescriptorSet(binder->GetDescriptorSetHandle(), binder->GetDescriptorSetLayoutBindingResources());
554 cmdList.BindDescriptorSet(0U, binder->GetDescriptorSetHandle());
555
556 const auto targetSize = targets_.displayResolution;
557
558 LocalPostProcessPushConstantStruct uPc;
559 uPc.viewportSizeInvSize = Math::Vec4(
560 float(targetSize.x), float(targetSize.y), 1.0f / float(targetSize.x), 1.0f / float(targetSize.y));
561 // .x = exposure, .y = pre exposure, .z = sharpness, .w = 0
562 // Sharpness : 0 = max, 1 = -1 stop, 2 = -2 stops
563 float exposure = 0.7f;
564 float preExposure = 1.0f;
565 float sharpness = 0.0f;
566 uPc.factor = Math::Vec4(exposure, preExposure, sharpness, 0.0f);
567 cmdList.PushConstantData(pc, arrayviewU8(uPc));
568
569 cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
570 }
571 }
572
CreateTargets(const BASE_NS::Math::UVec2 baseSize)573 void RenderPostProcessUpscaleNode::CreateTargets(const BASE_NS::Math::UVec2 baseSize)
574 {
575 if (baseSize.x != baseSize_.x || baseSize.y != baseSize_.y) {
576 baseSize_ = baseSize;
577 // We only store the luminance value in texture
578 ImageUsageFlags usageFlags =
579 CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT | CORE_IMAGE_USAGE_TRANSFER_SRC_BIT;
580
581 targets_.renderResolution = baseSize;
582 const uint32_t dSizeX = uint32_t(float(baseSize.x) * effectProperties_.ratio);
583 const uint32_t dSizeY = uint32_t(float(baseSize.y) * effectProperties_.ratio);
584
585 targets_.displayResolution = BASE_NS::Math::UVec2(dSizeX, dSizeY); // effectProperties_.ratio);
586
587 // create target image
588 const Math::UVec2 startTargetSize = baseSize;
589 GpuImageDesc desc {
590 ImageType::CORE_IMAGE_TYPE_2D,
591 ImageViewType::CORE_IMAGE_VIEW_TYPE_2D,
592 Format::BASE_FORMAT_R16_SFLOAT,
593 ImageTiling::CORE_IMAGE_TILING_OPTIMAL,
594 usageFlags,
595 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
596 0,
597 EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_DYNAMIC_BARRIERS,
598 startTargetSize.x,
599 startTargetSize.y,
600 1u,
601 1u,
602 1u,
603 SampleCountFlagBits::CORE_SAMPLE_COUNT_1_BIT,
604 {},
605 };
606
607 auto& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
608 #if (RENDER_VALIDATION_ENABLED == 1)
609 const string_view nodeName = renderNodeContextMgr_->GetName();
610 #endif
611 for (size_t idx = 0; idx < targets_.tex1.size(); ++idx) {
612 // every upscale target is half the size of the original/ previous upscale target
613 desc.width /= 2U;
614 desc.height /= 2U;
615 desc.width = (desc.width >= 1U) ? desc.width : 1U;
616 desc.height = (desc.height >= 1U) ? desc.height : 1U;
617 targets_.tex1Size[idx] = Math::UVec2(desc.width, desc.height);
618 #if (RENDER_VALIDATION_ENABLED == 1)
619 const auto baseTargetName = nodeName + "_lsr_luminance_Mip" + to_string(idx);
620 targets_.tex1[idx] = gpuResourceMgr.Create(baseTargetName, desc);
621 #else
622 targets_.tex1[idx] = gpuResourceMgr.Create(desc);
623 #endif
624 }
625
626 // Dilate and reconstruct targets at render resolution
627 desc.width = baseSize.x;
628 desc.height = baseSize.y;
629 desc.usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
630
631 // Create dilated depth
632 {
633 desc.format = Format::BASE_FORMAT_R16_UINT;
634 #if (RENDER_VALIDATION_ENABLED == 1)
635 const auto baseTargetName = nodeName + "_lsr_dilated_depth";
636 targets_.dilatedDepth = gpuResourceMgr.Create(baseTargetName, desc);
637 #else
638 targets_.dilatedDepth = gpuResourceMgr.Create(desc);
639 #endif
640 }
641
642 // Create dilated motion vectors
643
644 desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
645 for (int i = 0; i < 2U; ++i) {
646 #if (RENDER_VALIDATION_ENABLED == 1)
647 const auto name = nodeName + "_lsr_dilated_motion_vector_" + ((i == 0) ? "A" : "B");
648 targets_.dilatedMotionVectors[i] = gpuResourceMgr.Create(name, desc);
649 #else
650 targets_.dilatedMotionVectors[i] = gpuResourceMgr.Create(desc);
651 #endif
652 }
653
654 // Create lock input luma
655 {
656 desc.format = Format::BASE_FORMAT_R16_SFLOAT;
657 #if (RENDER_VALIDATION_ENABLED == 1)
658 const auto baseTargetName = nodeName + "_lsr_lock_input_luma";
659 targets_.lockInputLuma = gpuResourceMgr.Create(baseTargetName, desc);
660 #else
661 targets_.lockInputLuma = gpuResourceMgr.Create(desc);
662 #endif
663 }
664
665 // Create previous depth for the current frame
666 {
667 desc.format = Format::BASE_FORMAT_R32_UINT;
668 #if (RENDER_VALIDATION_ENABLED == 1)
669 const auto baseTargetName = nodeName + "_lsr_previous_depth";
670 targets_.estPrevDepth = gpuResourceMgr.Create(baseTargetName, desc);
671 #else
672 targets_.estPrevDepth = gpuResourceMgr.Create(desc);
673 #endif
674 }
675 // Debug depth map texture
676 {
677 desc.format = Format::BASE_FORMAT_R32_SFLOAT;
678 #if (RENDER_VALIDATION_ENABLED == 1)
679 const auto baseTargetName = nodeName + "_lsr_debug_depth";
680 targets_.debugImage = gpuResourceMgr.Create(baseTargetName, desc);
681 #else
682 targets_.debugImage = gpuResourceMgr.Create(desc);
683 #endif
684 }
685 {
686 desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
687 #if (RENDER_VALIDATION_ENABLED == 1)
688 const auto baseTargetName = nodeName + "_lsr_adjusted_color";
689 targets_.adjustedColorBuffer = gpuResourceMgr.Create(baseTargetName, desc);
690 #else
691 targets_.adjustedColorBuffer = gpuResourceMgr.Create(desc);
692 #endif
693 }
694 // Dilated reactive mask texture
695 {
696 desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
697 #if (RENDER_VALIDATION_ENABLED == 1)
698 const auto baseTargetName = nodeName + "_lsr_reactive_mask";
699 targets_.dilatedReactiveMask = gpuResourceMgr.Create(baseTargetName, desc);
700 #else
701 targets_.dilatedReactiveMask = gpuResourceMgr.Create(desc);
702 #endif
703 }
704 // Create locks mask texture
705 {
706 desc.width = targets_.displayResolution.x;
707 desc.height = targets_.displayResolution.y;
708 desc.format = Format::BASE_FORMAT_R8_UNORM;
709 #if (RENDER_VALIDATION_ENABLED == 1)
710 const auto baseTargetName = nodeName + "_lsr_new_locks_mask";
711 targets_.newLocksMask = gpuResourceMgr.Create(baseTargetName, desc);
712 #else
713 targets_.newLocksMask = gpuResourceMgr.Create(desc);
714 #endif
715 }
716
717 // ---------- Create Accumulate Pass History Buffers (DISPLAY resolution) ----------
718 desc.width = targets_.displayResolution.x;
719 desc.height = targets_.displayResolution.y;
720 desc.usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
721
722 // Color History (RGBA16F)
723 desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
724 for (int i = 0; i < 2U; ++i) {
725 #if (RENDER_VALIDATION_ENABLED == 1)
726 const auto name = nodeName + "_lsr_history_color_" + ((i == 0) ? "A" : "B");
727 targets_.historyColorAndReactive[i] = gpuResourceMgr.Create(name, desc);
728 #else
729 targets_.historyColorAndReactive[i] = gpuResourceMgr.Create(desc);
730 #endif
731 }
732
733 // Lock Status History (RG16F)
734 desc.format = Format::BASE_FORMAT_R16G16_SFLOAT;
735 for (int i = 0; i < 2U; ++i) {
736 #if (RENDER_VALIDATION_ENABLED == 1)
737 const auto name = nodeName + "_lsr_history_lock_" + ((i == 0) ? "A" : "B");
738 targets_.historyLockStatus[i] = gpuResourceMgr.Create(name, desc);
739 #else
740 targets_.historyLockStatus[i] = gpuResourceMgr.Create(desc);
741 #endif
742 }
743
744 // Luma History (RGBA16F)
745 desc.format = Format::BASE_FORMAT_R8G8B8A8_UNORM;
746 for (int i = 0; i < 2U; ++i) {
747 #if (RENDER_VALIDATION_ENABLED == 1)
748 const auto name = nodeName + "_lsr_history_luma_" + ((i == 0) ? "A" : "B");
749 targets_.historyLuma[i] = gpuResourceMgr.Create(name, desc);
750 #else
751 targets_.historyLuma[i] = gpuResourceMgr.Create(desc);
752 #endif
753 }
754
755 {
756 desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
757 #if (RENDER_VALIDATION_ENABLED == 1)
758 const auto baseTargetName = nodeName + "_lsr_final_color";
759 targets_.finalColor = gpuResourceMgr.Create(baseTargetName, desc);
760 #else
761 targets_.finalColor = gpuResourceMgr.Create(desc);
762 #endif
763 }
764
765 // Optional RCAS pass
766 {
767 desc.format = Format::BASE_FORMAT_R16G16B16A16_SFLOAT;
768 #if (RENDER_VALIDATION_ENABLED == 1)
769 const auto baseTargetName = nodeName + "_lsr_rcas_out_color";
770 targets_.rcas_final = gpuResourceMgr.Create(baseTargetName, desc);
771 #else
772 targets_.rcas_final = gpuResourceMgr.Create(desc);
773 #endif
774 }
775 }
776 }
777
CreatePsos()778 void RenderPostProcessUpscaleNode::CreatePsos()
779 {
780 if (binders_.accumulatePass) {
781 return;
782 }
783
784 const auto& shaderMgr = renderNodeContextMgr_->GetShaderManager();
785 INodeContextPsoManager& psoMgr = renderNodeContextMgr_->GetPsoManager();
786 INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr_->GetDescriptorSetManager();
787
788 // Luminance Hierarchy pass
789 {
790 const RenderHandle shader =
791 shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_luminance_downscale.shader");
792 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
793
794 psos_.luminanceDownscale = psoMgr.GetComputePsoHandle(shader, pl, {});
795 psos_.luminanceDownscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
796
797 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
798 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
799 binders_.luminanceDownscale = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
800 }
801
802 // Luminance Hierarchy pass
803 {
804 const RenderHandle shader =
805 shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_luminance_pyramid.shader");
806 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
807
808 psos_.luminancePyramid = psoMgr.GetComputePsoHandle(shader, pl, {});
809 psos_.luminancePyramidTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
810
811 constexpr uint32_t localSetIdx = 0U;
812 const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
813
814 for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
815 binders_.luminancePyramid[idx] =
816 dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
817 }
818 }
819 // Dilate and reconstruct pass
820 {
821 const RenderHandle shader =
822 shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_dilate_and_reconstruct.shader");
823 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
824
825 psos_.reconstructAndDilate = psoMgr.GetComputePsoHandle(shader, pl, {});
826 psos_.reconstructAndDilateTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
827
828 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
829 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
830 binders_.reconstructAndDilate = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
831 }
832
833 // Debug depth map pass
834 {
835 const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_debug_depth.shader");
836 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
837
838 psos_.debugPass = psoMgr.GetComputePsoHandle(shader, pl, {});
839 psos_.debugPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
840
841 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
842 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
843 binders_.debugPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
844 }
845
846 // Clipping pass
847 {
848 const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_depth_clip.shader");
849 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
850
851 psos_.depthClipPass = psoMgr.GetComputePsoHandle(shader, pl, {});
852 psos_.depthClipPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
853
854 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
855 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
856 binders_.depthClipPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
857 }
858
859 // Create new locks
860 {
861 const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_lock.shader");
862 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
863
864 psos_.locksPass = psoMgr.GetComputePsoHandle(shader, pl, {});
865 psos_.locksPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
866
867 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
868 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
869 binders_.locksPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
870 }
871
872 // Accumulate pass
873 {
874 const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_accumulate.shader");
875 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
876
877 psos_.accumulatePass = psoMgr.GetComputePsoHandle(shader, pl, {});
878 psos_.accumulatePassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
879
880 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
881 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
882 binders_.accumulatePass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
883 }
884
885 // Rcas pass
886 {
887 const RenderHandle shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/lsr_rcas.shader");
888 const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
889
890 psos_.rcasPass = psoMgr.GetComputePsoHandle(shader, pl, {});
891 psos_.rcasPassTGS = shaderMgr.GetReflectionThreadGroupSize(shader);
892
893 const auto& bindings = pl.descriptorSetLayouts[0].bindings;
894 RenderHandle dsetHandle = dSetMgr.CreateDescriptorSet(bindings);
895 binders_.rcasPass = dSetMgr.CreateDescriptorSetBinder(dsetHandle, bindings);
896 }
897 }
898
EvaluateOutput()899 void RenderPostProcessUpscaleNode::EvaluateOutput()
900 {
901 if (RenderHandleUtil::IsValid(nodeInputsData.input.handle)) {
902 nodeOutputsData.output = nodeInputsData.input;
903 }
904 }
905 RENDER_END_NAMESPACE()
906