1#version 460 core 2#extension GL_ARB_separate_shader_objects : enable 3#extension GL_ARB_shading_language_420pack : enable 4 5// Lume Super Resolution 6// Accumulate pass 7 8// includes 9#include "common/bloom_common.h" 10#include "render/shaders/common/render_color_conversion_common.h" 11#include "render/shaders/common/render_post_process_structs_common.h" 12 13// camera data 14struct DefaultCameraMatrixStruct { 15 mat4 view; 16 mat4 proj; 17 mat4 viewProj; 18 19 mat4 viewInv; 20 mat4 projInv; 21 mat4 viewProjInv; 22 23 mat4 viewPrevFrame; 24 mat4 projPrevFrame; 25 mat4 viewProjPrevFrame; 26 27 mat4 shadowViewProj; 28 mat4 shadowViewProjInv; 29 30 // .xy = jitter offset, .zw = jitter offset with baked screen size 31 vec4 jitter; 32 vec4 jitterPrevFrame; 33 34 // .xy = unique id (64-bit), .zw = layer mask (64 bit) 35 uvec4 indices; 36 // .x multi-view camera additional layer count, .yzw 3 multi-view camera indices 37 // yzw are packed, use unpack functions 38 uvec4 multiViewIndices; 39 40 vec4 frustumPlanes[2]; 41 42 // .x environment count 43 uvec4 counts; 44 // padding to 256 45 uvec4 pad0; 46 mat4 matPad0; 47 mat4 matPad1; 48}; 49 50// sets 51layout(set = 0, binding = 2) uniform sampler uSampler; 52 53layout(set = 0, binding = 0) uniform texture2D adjustedColorDepthClipTex; // rgba16f 54layout(set = 0, binding = 1) uniform texture2D dilatedReactiveMaskTex; // rg16f 55layout(set = 0, binding = 3, r8) uniform image2D newLocksMask; // r8 56layout(set = 0, binding = 4, rg16f) uniform image2D dilatedMotionTex; 57 58layout(set = 0, binding = 5) uniform texture2D historyColorReactiveTex; // rgba16f 59layout(set = 0, binding = 6) uniform texture2D historyLockStatusTex; // rg16f 60layout(set = 0, binding = 7, rgba8) uniform image2D historyLumaTex; // rgba16f 61 62layout(set = 0, binding = 8, rgba16f) uniform writeonly image2D out_HistoryColorReactive; 63layout(set = 0, binding = 9, rg16f) uniform writeonly image2D out_HistoryLockStatus; 64layout(set = 0, binding = 10, rgba8) uniform writeonly image2D out_HistoryLuma; 65 66layout(set = 0, binding = 11, rgba16f) uniform writeonly image2D out_FinalColor; 67layout(set = 0, binding = 13, r16f) uniform image2D luminanceTex; // r16 68 69// Push constants 70struct AccumulatePassPushConstant { 71 vec4 displaySizeInvSize; 72 vec4 viewportSizeInvSize; 73 float exposure; 74 uint frameIndex; 75 uint jitterSequenceLength; 76 float avgLanczosWeightPerFrame; 77 float maxAccumulationLanczosWeight; 78}; 79 80struct AccumulateOutputs { 81 vec4 fColorAndWeight; 82 vec2 fLockStatus; 83 vec4 fLumaHistory; 84 vec3 fColor; 85}; 86 87layout(push_constant, std430) uniform uPushConstantBlock 88{ 89 AccumulatePassPushConstant uPc; 90}; 91 92layout(set = 0, binding = 12, std140) uniform uCameraMatrices 93{ 94 DefaultCameraMatrixStruct uCameras[16]; 95}; 96 97// --- Constants --- 98const float LSR_EPSILON = 1e-05f; 99const float LSR_FLT_MAX = 3.402823466e+38f; 100const int iLanczos2SampleCount = 9; 101const float fUpsampleLanczosWeightScale = 1.0f / 9.0f; 102 103const int LUMA_N_MINUS_1 = 0; 104const int LUMA_N_MINUS_2 = 1; 105const int LUMA_N_MINUS_3 = 2; 106const int LUMA_N_MINUS_4 = 3; 107 108const int LSR_LOCK_LIFETIME_REMAINING = 0; 109const int LSR_LOCK_TEMPORAL_LUMA = 1; 110 111// Luma instability variables 112const float kUnorm8Step = 1.0 / 255.0; 113const float kInvUnorm8Step = 255.0; 114 115//---------------- Utility & Helper Functions ------------------// 116 117ivec2 DisplaySize() 118{ 119 return ivec2(uPc.displaySizeInvSize.xy); 120} 121vec2 DisplaySizeVec2() 122{ 123 return uPc.displaySizeInvSize.xy; 124} 125vec2 InvDisplaySize() 126{ 127 return uPc.displaySizeInvSize.zw; 128} 129ivec2 RenderSize() 130{ 131 return ivec2(uPc.viewportSizeInvSize.xy); 132} 133vec2 RenderSizeVec2() 134{ 135 return uPc.viewportSizeInvSize.xy; 136} 137vec2 InvRenderSize() 138{ 139 return uPc.viewportSizeInvSize.zw; 140} 141vec2 UpscaleFactor() 142{ 143 return DisplaySizeVec2() * InvRenderSize(); 144} 145vec2 DownscaleFactor() 146{ 147 return uPc.viewportSizeInvSize.xy * uPc.displaySizeInvSize.zw; 148} 149float Exposure() 150{ 151 return 0.7f; 152} 153float PreviousFramePreExposure() 154{ 155 return 1.0f; 156} 157uint FrameIndex() 158{ 159 return 1; 160} 161vec2 JitterPixels() 162{ 163 return uCameras[0].jitter.xy; 164} 165vec2 JitterUV() 166{ 167 return uCameras[0].jitter.zw; 168} 169// vec2 JitterUV() { return vec2(0); } 170// vec2 JitterPrevUV() { return vec2(0); } 171 172uint JitterSequenceLength() 173{ 174 return 16; 175} 176float AverageLanczosWeightPerFrame() 177{ 178 return 1.0 / 9.0; 179} 180float MaxAccumulationLanczosWeight() 181{ 182 return 0.95; 183} 184 185float saturate(float x) 186{ 187 return clamp(x, 0.0, 1.0); 188} 189vec2 saturate(vec2 x) 190{ 191 return clamp(x, 0.0, 1.0); 192} 193 194vec3 saturate(vec3 x) 195{ 196 return clamp(x, 0.0, 1.0); 197} 198 199bool IsUvInside(vec2 uv) 200{ 201 return all(greaterThanEqual(uv, vec2(0.0))) && all(lessThanEqual(uv, vec2(1.0))); 202} 203 204vec2 ClampUv(vec2 uv) 205{ 206 return clamp(uv, vec2(0.0), vec2(1.0)); 207} 208 209vec3 Tonemap(vec3 rgb) 210{ 211 return rgb / (max(max(0.f, rgb.r), max(rgb.g, rgb.b)) + 1.f).xxx; 212} 213 214vec3 InverseTonemap(vec3 rgb) 215{ 216 return rgb / max(1e-0001, 1.f - max(rgb.r, max(rgb.g, rgb.b))).xxx; 217} 218 219// --- Sampling Functions --- 220 221struct CatmullRomSamples9Tap { 222 vec2 UV[3]; 223 vec2 Weight[3]; 224 float FinalMultiplier; 225}; 226 227CatmullRomSamples9Tap Get2DCatmullRom9Kernel(vec2 uv, vec2 size, vec2 invSize) 228{ 229 CatmullRomSamples9Tap catmullSamples; 230 vec2 samplePos = uv * size; 231 vec2 texPos1 = floor(samplePos - 0.5f) + 0.5f; 232 vec2 f = samplePos - texPos1; 233 234 vec2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f)); 235 vec2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f); 236 vec2 w2 = f * (0.5f + f * (2.0f - 1.5f * f)); 237 vec2 w3 = f * f * (-0.5f + 0.5f * f); 238 239 catmullSamples.Weight[0] = w0; 240 catmullSamples.Weight[1] = w1 + w2; 241 catmullSamples.Weight[2] = w3; 242 243 vec2 offset12 = w2 / (w1 + w2); 244 245 // Compute the final UV coordinates we'll use for sampling the texture 246 catmullSamples.UV[0] = texPos1 - 1.0; 247 catmullSamples.UV[1] = texPos1 + 2.0; 248 catmullSamples.UV[2] = texPos1 + offset12; 249 250 catmullSamples.UV[0] *= invSize; 251 catmullSamples.UV[1] *= invSize; 252 catmullSamples.UV[2] *= invSize; 253 return catmullSamples; 254} 255 256// 1D Lanczos 2 Kernel function 257float Lanczos2(float x) 258{ 259 x = abs(x); 260 if (x < LSR_EPSILON) 261 return 1.0; 262 if (x >= 2.0) 263 return 0.0; 264 265 float PI = 3.141592653589793; 266 float pix = PI * x; 267 float pix2 = PI * x * 0.5; 268 269 float s1 = sin(pix) / max(pix, LSR_EPSILON); 270 float s2 = sin(pix2) / max(pix2, LSR_EPSILON); 271 272 return s1 * s2; 273} 274 275vec4 SampleHistoryColorKernel(vec2 fUvSample) 276{ 277 CatmullRomSamples9Tap samples = Get2DCatmullRom9Kernel(fUvSample, DisplaySize(), InvDisplaySize()); 278 279 vec4 fColor = vec4(0.0f); 280 281 vec4 fColor00 = texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[0]); 282 fColor += fColor00 * samples.Weight[0].x * samples.Weight[0].y; 283 vec4 fColor20 = texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[2].x, samples.UV[0].y)); 284 fColor += fColor20 * samples.Weight[1].x * samples.Weight[0].y; 285 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[1].x, samples.UV[0].y)) * 286 samples.Weight[2].x * samples.Weight[0].y; 287 288 vec4 fColor02 = texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[2].y)); 289 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[2].y)) * 290 samples.Weight[0].x * samples.Weight[1].y; 291 vec4 fColor22 = texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[2]); 292 fColor += fColor22 * samples.Weight[1].x * samples.Weight[1].y; 293 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[1].x, samples.UV[2].y)) * 294 samples.Weight[2].x * samples.Weight[1].y; 295 296 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[1].y)) * 297 samples.Weight[0].x * samples.Weight[2].y; 298 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[2].x, samples.UV[1].y)) * 299 samples.Weight[1].x * samples.Weight[2].y; 300 fColor += texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[1]) * samples.Weight[2].x * 301 samples.Weight[2].y; 302 303 const vec4 fDeringingSamples[4] = { fColor00, fColor20, fColor02, fColor22 }; 304 305 vec4 fDeringingMin = fDeringingSamples[0]; 306 vec4 fDeringingMax = fDeringingSamples[0]; 307 308 for (int iSampleIndex = 1; iSampleIndex < 4; ++iSampleIndex) { 309 fDeringingMin = min(fDeringingMin, fDeringingSamples[iSampleIndex]); 310 fDeringingMax = max(fDeringingMax, fDeringingSamples[iSampleIndex]); 311 } 312 fColor = clamp(fColor, fDeringingMin, fDeringingMax); 313 314 return fColor; 315} 316 317vec4 SampleAdjustedColorDepthClip(vec2 uv_render_jittered) 318{ 319 return texture(sampler2D(adjustedColorDepthClipTex, uSampler), uv_render_jittered); 320} 321vec2 SampleDilatedReactiveMasks(vec2 uv_render_jittered) 322{ 323 if (!IsUvInside(uv_render_jittered)) { 324 return vec2(0.0); 325 } 326 return texture(sampler2D(dilatedReactiveMaskTex, uSampler), uv_render_jittered).rg; 327} 328 329float LoadLuminance(ivec2 render_coord) 330{ 331 return imageLoad(luminanceTex, render_coord).r; 332} 333 334float LoadNewLockRequest(ivec2 coord_display) 335{ 336 return imageLoad(newLocksMask, coord_display).r; 337} 338vec2 LoadMotionVector(vec2 uv_render) 339{ 340 ivec2 coord = ivec2(uv_render * RenderSize()); 341 return imageLoad(dilatedMotionTex, coord).rg; 342} 343vec2 SampleHistoryLockStatus(vec2 uv_display) 344{ 345 return texture(sampler2D(historyLockStatusTex, uSampler), uv_display).rg; 346} 347vec4 SampleHistoryLuma(vec2 uv_display) 348{ 349 ivec2 pixel = ivec2(uv_display * DisplaySize() + 0.5); 350 return imageLoad(historyLumaTex, pixel); 351} 352 353// --- Data Structs --- 354struct LockState { 355 bool newLock; 356 bool wasLockedPrevFrame; 357}; 358 359struct RectificationBox { 360 vec3 boxCenter; // Weighted average color (Linear RGB) 361 vec3 boxVec; // Standard deviation (Linear RGB) 362 vec3 aabbMin; // Min bounds (Linear RGB) 363 vec3 aabbMax; // Max bounds (Linear RGB) 364 float fBoxCenterWeight; // Total weight used for averaging 365 // Add sum of squares for proper variance calculation 366 vec3 weightedColorSum; 367 vec3 weightedColorSqSum; 368}; 369 370struct AccumulationPassCommonParams { 371 ivec2 iPxHrPos; // Integer coordinate at Display Res 372 vec2 fHrUv; // Float UV at Display Res (pixel center) 373 vec2 fLrUv_Sample; // Float UV for sampling Render Res textures (with jitter) 374 vec2 fMotionVector; 375 float fHrVelocity; // Pixel space velocity magnitude (Display Res) 376 vec2 fReprojectedHrUv; // UV in previous frame's Display Res space 377 bool bIsExistingSample; // Was fReprojectedHrUv on screen? 378 bool bIsNewSample; // Should discard history? (First frame or !bIsExistingSample) 379 float fDilatedReactiveFactor; // Reactive mask X value 380 float fAccumulationMask; // Reactive mask Y value (Composition/Motion) 381 float fDepthClipFactor; // Depth clip value 382}; 383 384float Lanczos2ApproxSqNoClamp(float x2) 385{ 386 float a = (2.0f / 5.0f) * x2 - 1; 387 float b = (1.0f / 4.0f) * x2 - 1; 388 return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); 389} 390 391float Lanczos2ApproxSq(float x2) 392{ 393 x2 = min(x2, 4.0f); 394 return Lanczos2ApproxSqNoClamp(x2); 395} 396 397void RectificationBoxReset(inout RectificationBox rectificationBox) 398{ 399 rectificationBox.fBoxCenterWeight = 0.0f; 400 rectificationBox.boxCenter = vec3(0.0); 401 rectificationBox.boxVec = vec3(0.0); 402 rectificationBox.aabbMin = vec3(LSR_FLT_MAX); 403 rectificationBox.aabbMax = vec3(-LSR_FLT_MAX); 404} 405 406void RectificationBoxAddInitialSample( 407 inout RectificationBox rectificationBox, const vec3 colorSample, const float fSampleWeight) 408{ 409 rectificationBox.aabbMin = colorSample; 410 rectificationBox.aabbMax = colorSample; 411 412 vec3 weightedSample = colorSample * fSampleWeight; 413 rectificationBox.boxCenter = weightedSample; 414 rectificationBox.boxVec = colorSample * weightedSample; 415 rectificationBox.fBoxCenterWeight = fSampleWeight; 416} 417 418void RectificationBoxAddSample( 419 bool bInitialSample, inout RectificationBox rectificationBox, const vec3 colorSample, const float fSampleWeight) 420{ 421 if (bInitialSample) { 422 RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); 423 } else { 424 rectificationBox.aabbMin = min(rectificationBox.aabbMin, colorSample); 425 rectificationBox.aabbMax = max(rectificationBox.aabbMax, colorSample); 426 427 vec3 weightedSample = colorSample * fSampleWeight; 428 rectificationBox.boxCenter += weightedSample; 429 rectificationBox.boxVec += colorSample * weightedSample; 430 rectificationBox.fBoxCenterWeight += fSampleWeight; 431 } 432} 433 434void RectificationBoxComputeVarianceBoxData(inout RectificationBox rectificationBox) 435{ 436 rectificationBox.fBoxCenterWeight = 437 (abs(rectificationBox.fBoxCenterWeight) > float(LSR_EPSILON) ? rectificationBox.fBoxCenterWeight : 1.0); 438 rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; 439 rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; 440 441 vec3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); 442 rectificationBox.boxVec = stdDev; 443} 444 445// --- Reprojection Related --- 446vec2 GetMotionVector(ivec2 iPxHrPos, vec2 fHrUv) 447{ 448 vec2 uv_render = fHrUv;// * DownscaleFactor(); 449 uv_render = ClampUv(uv_render); 450 if (!IsUvInside(uv_render)) { 451 return vec2(0.0); 452 } 453 return LoadMotionVector(uv_render); 454} 455 456float GetPxHrVelocity(vec2 motionVector) 457{ 458 float vel = length(motionVector); 459 if (vel < 0.01) 460 vel = 0; 461 else 462 vel = length(motionVector * DisplaySizeVec2()); 463 return vel; 464} 465 466void ComputeReprojectedUVs(vec2 fHrUv, vec2 motionVector, out vec2 fReprojectedHrUv, out bool bIsExistingSample) 467{ 468 fReprojectedHrUv = fHrUv - motionVector; 469 bIsExistingSample = IsUvInside(fReprojectedHrUv); 470} 471 472void ReprojectHistoryColor( 473 vec2 fReprojectedHrUv, out vec3 historyColorYCoCg, out float temporalReactiveFactor, out bool bInMotionLastFrame) 474{ 475 vec4 historySample = SampleHistoryColorKernel(fReprojectedHrUv); 476 477 float alpha = historySample.a; 478 bInMotionLastFrame = (alpha < 0.0); 479 temporalReactiveFactor = abs(alpha); 480 481 historyColorYCoCg = historySample.rgb; 482} 483 484LockState ReprojectHistoryLockStatus(vec2 fReprojectedHrUv, ivec2 iPxHrPos, out vec2 reprojectedLockStatus) 485{ 486 LockState state; 487 488 float newLockRequest = LoadNewLockRequest(iPxHrPos); 489 state.newLock = (newLockRequest > (127.0 / 255.0)); 490 491 reprojectedLockStatus = SampleHistoryLockStatus(fReprojectedHrUv); 492 493 // Check if previous frame was locked based on Lifetime Remaining 494 state.wasLockedPrevFrame = (reprojectedLockStatus[LSR_LOCK_LIFETIME_REMAINING] > 0.0f); 495 496 return state; 497} 498 499AccumulationPassCommonParams InitParams(ivec2 iPxHrPos) 500{ 501 AccumulationPassCommonParams params; 502 params.iPxHrPos = iPxHrPos; 503 params.fHrUv = (vec2(iPxHrPos) + 0.5) * InvDisplaySize(); 504 vec2 jitterUV = JitterUV(); 505 506 vec2 renderUvBase = params.fHrUv; 507 params.fLrUv_Sample = renderUvBase + jitterUV; 508 params.fLrUv_Sample = clamp(params.fLrUv_Sample, vec2(0), vec2(1)); 509 510 vec2 sampledMV = GetMotionVector(iPxHrPos, params.fHrUv); // Samples render-res MV texture 511 params.fMotionVector = sampledMV / DownscaleFactor(); 512 params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); 513 ComputeReprojectedUVs(params.fHrUv, params.fMotionVector, params.fReprojectedHrUv, params.bIsExistingSample); 514 // Init factors 515 params.fDilatedReactiveFactor = 0.0; 516 params.fAccumulationMask = 0.0; 517 params.fDepthClipFactor = 0.0; 518 params.bIsNewSample = false; 519 return params; 520} 521 522void InitReactiveMaskFactors(inout AccumulationPassCommonParams params) 523{ 524 vec2 dilatedMasks = SampleDilatedReactiveMasks(params.fLrUv_Sample); 525 params.fDilatedReactiveFactor = dilatedMasks.x; 526 params.fAccumulationMask = dilatedMasks.y; 527} 528void InitDepthClipFactors(inout AccumulationPassCommonParams params) 529{ 530 params.fDepthClipFactor = saturate(SampleAdjustedColorDepthClip(params.fLrUv_Sample).a); 531} 532void initIsNewSample(inout AccumulationPassCommonParams params) 533{ 534 bool bIsResetFrame = (0 == FrameIndex()); 535 params.bIsNewSample = (!params.bIsExistingSample || bIsResetFrame); 536} 537 538// --- Lock Status Update --- 539void KillLock(inout vec2 lockStatus) 540{ 541 lockStatus[LSR_LOCK_LIFETIME_REMAINING] = 0; 542} 543void InitializeNewLockSample(out vec2 lockStatus) 544{ 545 lockStatus = vec2(0.0); 546} 547 548float MinDividedByMax(float a, float b) 549{ 550 return min(a, b) / max(max(a, b), LSR_EPSILON); 551} 552 553float GetShadingChangeLuma(ivec2 iPxHrPos, vec2 fUvCoord) 554{ 555 ivec2 size = imageSize(luminanceTex); 556 ivec2 coord = clamp(ivec2(fUvCoord * size), ivec2(0), size - ivec2(1)); 557 return LoadLuminance(coord); 558} 559 560void UpdateLockStatus(const AccumulationPassCommonParams params, inout float fReactiveFactor, LockState lockState, 561 inout vec2 fLockStatus, out float fLockContributionThisFrame, out float fLuminanceDiff) 562{ 563 float fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); 564 565 fLockStatus[LSR_LOCK_TEMPORAL_LUMA] = 566 (fLockStatus[LSR_LOCK_TEMPORAL_LUMA] == 0.0) ? fShadingChangeLuma : fLockStatus[LSR_LOCK_TEMPORAL_LUMA]; 567 568 float fPrevLuma = fLockStatus[LSR_LOCK_TEMPORAL_LUMA]; 569 570 fLuminanceDiff = 1.0 - MinDividedByMax(fPrevLuma, fShadingChangeLuma); 571 572 if (lockState.newLock) { 573 fLockStatus[LSR_LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; 574 fLockStatus[LSR_LOCK_LIFETIME_REMAINING] = (fLockStatus[LSR_LOCK_LIFETIME_REMAINING] != 0.0) ? 2.0 : 1.0; 575 } else if (fLockStatus[LSR_LOCK_LIFETIME_REMAINING] <= 1.0) { 576 fLockStatus[LSR_LOCK_TEMPORAL_LUMA] = mix(fLockStatus[LSR_LOCK_TEMPORAL_LUMA], fShadingChangeLuma, 0.5); 577 } else { 578 if (fLuminanceDiff > 0.2) 579 KillLock(fLockStatus); 580 } 581 582 fReactiveFactor = max(fReactiveFactor, saturate((fLuminanceDiff - 0.1) * 10.0)); 583 584 fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= (1.0 - fReactiveFactor); 585 fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= saturate(1.0 - params.fAccumulationMask); 586 fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= float(params.fDepthClipFactor < 0.1); 587 588 float lifetimeC = saturate(fLockStatus[LSR_LOCK_LIFETIME_REMAINING] - 1.0); 589 float shadingC = saturate(MinDividedByMax(fLockStatus[LSR_LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); 590 fLockContributionThisFrame = saturate(saturate(lifetimeC * 4.0) * shadingC); 591} 592 593// --- Luma Instability --- 594float ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, 595 RectificationBox clippingBox, 596 float thisFrameReactiveFactor, 597 float luminanceDiff, 598 inout AccumulateOutputs result) 599{ 600 float currentLuma = clippingBox.boxCenter.x; // linear 0-1 601 currentLuma = saturate(currentLuma); 602 currentLuma = floor(currentLuma * kInvUnorm8Step + 0.5) * kUnorm8Step; 603 604 bool sampleHistory = 605 (max(max(params.fDepthClipFactor, 606 params.fAccumulationMask), 607 luminanceDiff) < 0.1) && 608 (!params.bIsNewSample); 609 610 vec4 history = sampleHistory 611 ? SampleHistoryLuma(params.fReprojectedHrUv) 612 : vec4(0.0); 613 614 float d0 = currentLuma - history[LUMA_N_MINUS_1]; 615 float minDiff = abs(d0); 616 617 if (minDiff >= kUnorm8Step) 618 { 619 for (int i = LUMA_N_MINUS_2; i <= LUMA_N_MINUS_4; ++i) 620 { 621 float di = currentLuma - history[i]; 622 if (sign(d0) == sign(di)) 623 minDiff = min(minDiff, abs(di)); 624 } 625 626 float boxFactor = 627 pow(saturate(clippingBox.boxVec.x / 0.1), 6.0); 628 629 float instability = float(minDiff != abs(d0)) * boxFactor; 630 instability = float(instability > kUnorm8Step); 631 632 instability *= 1.0 - max(params.fAccumulationMask, 633 pow(thisFrameReactiveFactor, 1.0 / 6.0)); 634 635 history.rgb = history.gba; 636 history.a = currentLuma; 637 638 result.fLumaHistory = history; 639 return instability * float(history[LUMA_N_MINUS_4] != 0.0); 640 } 641 642 history.rgb = history.gba; 643 history.a = currentLuma; 644 result.fLumaHistory = history; 645 return 0.0; 646} 647 648void FinalizeLockStatus( 649 const AccumulationPassCommonParams params, vec2 fLockStatus, float fUpsampledWeight, inout AccumulateOutputs result) 650{ 651 vec2 hrMotionVectorUV = params.fMotionVector; 652 vec2 fEstimatedUvNextFrame = params.fHrUv + hrMotionVectorUV; 653 654 if (!IsUvInside(fEstimatedUvNextFrame)) { 655 KillLock(fLockStatus); 656 } else { 657 const float fAverageWeightPerFrame = (1.0 / 9.0) * 0.74; 658 659 float fLifetimeDecreaseMaxTotalWeight = 660 max(LSR_EPSILON, float(JitterSequenceLength()) * fAverageWeightPerFrame); 661 662 float fLifetimeDecrease = (fUpsampledWeight / fLifetimeDecreaseMaxTotalWeight); 663 664 fLockStatus[LSR_LOCK_LIFETIME_REMAINING] = 665 max(0.0f, fLockStatus[LSR_LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); 666 } 667 668 result.fLockStatus = fLockStatus; 669} 670 671// --- Accumulation Weight -- 672//------------------------------------------------------------------------------------------------- 673// Upsample Functions (Adapted from ARM ASR) 674//------------------------------------------------------------------------------------------------- 675void Deringing(RectificationBox clippingBox, inout vec3 fColor) 676{ 677 fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); 678} 679 680float GetUpsampleLanczosWeight(vec2 fSrcSampleOffset, float fKernelWeight) 681{ 682 vec2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; 683 return Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); 684} 685 686float ComputeMaxKernelWeight() 687{ 688 const float fKernelSizeBias = 1.0f; 689 vec2 dsFactor = DownscaleFactor(); 690 vec2 invDsFactor = (dsFactor.x > LSR_EPSILON && dsFactor.y > LSR_EPSILON) ? (vec2(1.0) / dsFactor) : vec2(1.0); 691 float fKernelWeight = 1.0f + (invDsFactor.x - 1.0f) * fKernelSizeBias; 692 return min(1.99f, fKernelWeight); 693} 694 695vec3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, float fThisFrameReactiveFactor, 696 bool bInMotionLastFrame, float fUpsampledWeight, LockState lockState) 697{ 698 float fBaseAccumulation = MaxAccumulationLanczosWeight() * float(params.bIsExistingSample) * 699 (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); 700 701 float motionFactor1 = max(float(bInMotionLastFrame), saturate(params.fHrVelocity * 10.0f)); 702 fBaseAccumulation = min(fBaseAccumulation, mix(fBaseAccumulation, fUpsampledWeight * 10.0f, motionFactor1)); 703 704 float motionFactor2 = saturate(params.fHrVelocity / 20.0f); 705 fBaseAccumulation = min(fBaseAccumulation, mix(fBaseAccumulation, fUpsampledWeight, motionFactor2)); 706 707 return vec3(fBaseAccumulation); 708} 709 710vec4 ComputeUpsampledColorAndWeight( 711 const AccumulationPassCommonParams params, inout RectificationBox clippingBox, float fReactiveFactor) 712{ 713 vec2 fDstOutputPos = vec2(params.iPxHrPos) + vec2(0.5); 714 vec2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); 715 ivec2 iSrcInputPos = ivec2(floor(fSrcOutputPos)); 716 vec2 fSrcUnjitteredPos = vec2(iSrcInputPos) + vec2(0.5); 717 vec2 fBaseSampleOffset = fSrcUnjitteredPos - fSrcOutputPos; 718 719 float fKernelReactiveFactor = max(fReactiveFactor, float(params.bIsNewSample)); 720 float fKernelBiasMax = ComputeMaxKernelWeight() * (1.0 - fKernelReactiveFactor); 721 float fKernelBiasMin = max(1.0, (1.0 + fKernelBiasMax) * 0.3); 722 float fKernelBiasFactor = max(0.0, max(0.25 * params.fDepthClipFactor, fKernelReactiveFactor)); 723 float fKernelBias = mix(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); 724 725 float fRectificationCurveBias = mix(-2.0, -3.0, clamp(params.fHrVelocity / 50.0, 0.0, 1.0)); 726 727 vec4 fColorAndWeight = vec4(0.0); 728 729 RectificationBoxReset(clippingBox); 730 731 const ivec2 sampleOffsets[iLanczos2SampleCount] = { ivec2(-1, -1), ivec2(0, -1), ivec2(1, -1), ivec2(-1, 0), 732 ivec2(0, 0), ivec2(1, 0), ivec2(-1, 1), ivec2(0, 1), ivec2(1, 1) }; 733 vec3 fSamplesRGB[iLanczos2SampleCount]; 734 735 for (int idx = 0; idx < iLanczos2SampleCount; idx++) { 736 ivec2 neighborCoord = iSrcInputPos + sampleOffsets[idx]; 737 vec2 neighborUV_unjittered = (vec2(neighborCoord) + 0.5) * InvRenderSize(); 738 vec2 neighborUV_sampling = neighborUV_unjittered - JitterUV(); 739 vec3 rgb = yCoCgToRgb(SampleAdjustedColorDepthClip(ClampUv(neighborUV_unjittered)).rgb); 740 fSamplesRGB[idx] = InverseTonemap(rgb); 741 } 742 743 for (int idx = 0; idx < iLanczos2SampleCount; idx++) { 744 vec2 fSrcSampleOffsetPixels = fBaseSampleOffset + vec2(sampleOffsets[idx]); 745 float fSampleWeight = GetUpsampleLanczosWeight(fSrcSampleOffsetPixels, fKernelBias); 746 fColorAndWeight.rgb += fSamplesRGB[idx] * fSampleWeight; 747 fColorAndWeight.a += fSampleWeight; 748 float fBoxSampleWeight = exp(fRectificationCurveBias * dot(fSrcSampleOffsetPixels, fSrcSampleOffsetPixels)); 749 RectificationBoxAddSample(idx == 0, clippingBox, fSamplesRGB[idx], fBoxSampleWeight); 750 } 751 752 RectificationBoxComputeVarianceBoxData(clippingBox); 753 754 if (fColorAndWeight.a > LSR_EPSILON) { 755 fColorAndWeight.rgb /= fColorAndWeight.a; 756 fColorAndWeight.a *= fUpsampleLanczosWeightScale; 757 Deringing(clippingBox, fColorAndWeight.rgb); 758 } else { 759 fColorAndWeight.w = 0; 760 } 761 762 return fColorAndWeight; 763} 764 765void RectifyHistory(const AccumulationPassCommonParams params, RectificationBox clippingBox, 766 inout vec3 fHistoryColorYCoCg, inout vec3 fAccumulation, float fLockContributionThisFrame, 767 float fThisFrameReactiveFactor, float fLumaInstabilityFactor) 768{ 769 float fScaleFactorInfluence = min(20.0f, pow(1.0f / (DownscaleFactor().x * DownscaleFactor().y), 3.0f)); 770 float fVelocityFactor = clamp(params.fHrVelocity / 20.0f, 0.0f, 1.0f); 771 float fBoxScaleT = max(params.fDepthClipFactor, max(params.fAccumulationMask, fVelocityFactor)); 772 float fBoxScale = mix(fScaleFactorInfluence, 1.0f, fBoxScaleT); 773 774 vec3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; 775 vec3 boxMin = clippingBox.boxCenter - fScaledBoxVec; 776 vec3 boxMax = clippingBox.boxCenter + fScaledBoxVec; 777 boxMin = max(clippingBox.aabbMin, boxMin); 778 boxMax = min(clippingBox.aabbMax, boxMax); 779 //boxMin = InverseTonemap(boxMin); 780 //boxMax = InverseTonemap(boxMax); 781 vec3 colRgb = yCoCgToRgb(fHistoryColorYCoCg); 782 783 if (any(greaterThan(boxMin, colRgb)) || any(greaterThan(colRgb, boxMax))) { 784 vec3 fClampedHistoryColor = clamp(colRgb, boxMin, boxMax); 785 vec3 fHistoryContribution = max(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; 786 787 float fReactiveFactor = params.fDilatedReactiveFactor; 788 float fReactiveContribution = 1.0 - pow(fReactiveFactor, 1.0 / 2.0); 789 fHistoryContribution *= fReactiveContribution; 790 791 colRgb = mix(fClampedHistoryColor, colRgb, saturate(fHistoryContribution)); 792 793 const vec3 fAccumulationMin = min(fAccumulation, vec3(0.1f)); 794 fAccumulation = mix(fAccumulationMin, fAccumulation, saturate(fHistoryContribution)); 795 } 796 fHistoryColorYCoCg = rgbToYCoCg(colRgb); 797} 798 799void Accumulate(const AccumulationPassCommonParams params, inout vec3 fHistoryColorYCoCg, vec3 fAccumulation, 800 vec4 fUpsampledColorAndWeight) 801{ 802 vec3 newYCoCg = rgbToYCoCg(fUpsampledColorAndWeight.xyz); 803 fAccumulation = max(vec3(LSR_EPSILON), fAccumulation + fUpsampledColorAndWeight.www); 804 vec3 a = fUpsampledColorAndWeight.www / fAccumulation; 805 fHistoryColorYCoCg = mix(fHistoryColorYCoCg, newYCoCg, a); 806} 807 808// --- Output Color/Factor Preparation --- 809vec3 UnprepareRgb(vec3 colorYCoCg) 810{ 811 vec3 colorRGB = yCoCgToRgb(colorYCoCg); 812 //colorRGB = InverseTonemap(colorRGB); 813 colorRGB /= Exposure(); 814 return max(vec3(0.0), colorRGB); 815} 816 817float ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, float thisFrameReactiveFactor) 818{ 819 float fNewFactor = min(0.99f, thisFrameReactiveFactor); 820 fNewFactor = max(fNewFactor, mix(fNewFactor, 0.4f, clamp(params.fHrVelocity, 0.0f, 1.0f))); 821 fNewFactor = max(fNewFactor * fNewFactor, max(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); 822 fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; 823 if (clamp(params.fHrVelocity * 10.0f, 0.0f, 1.0f) >= 1.0f) { 824 fNewFactor = max(LSR_EPSILON, fNewFactor) * -1.0f; 825 } 826 return fNewFactor; 827} 828 829AccumulateOutputs AccumulatePass(ivec2 iPxHrPos) 830{ 831 AccumulationPassCommonParams params = InitParams(iPxHrPos); 832 833 vec3 fHistoryColorYCoCg = vec3(0.0); 834 vec2 fLockStatus = vec2(0.0); 835 InitializeNewLockSample(fLockStatus); 836 float fTemporalReactiveFactor = 0.0f; 837 bool bInMotionLastFrame = false; 838 LockState lockState; 839 lockState.newLock = false; 840 lockState.wasLockedPrevFrame = false; 841 842 initIsNewSample(params); 843 bool bUseHistory = !params.bIsNewSample; //&& uPc.frameIndex > 0; 844 845 AccumulateOutputs results; 846 847 if (bUseHistory) { 848 ReprojectHistoryColor(params.fReprojectedHrUv, fHistoryColorYCoCg, fTemporalReactiveFactor, bInMotionLastFrame); 849 lockState = ReprojectHistoryLockStatus(params.fReprojectedHrUv, iPxHrPos, fLockStatus); 850 } else { 851 lockState.newLock = (LoadNewLockRequest(iPxHrPos) > (127.0 / 255.0)); 852 lockState.wasLockedPrevFrame = false; 853 } 854 855 InitReactiveMaskFactors(params); 856 InitDepthClipFactors(params); 857 858 float fThisFrameReactiveFactor = max(params.fDilatedReactiveFactor, fTemporalReactiveFactor); 859 860 float fLuminanceDiff = 0.0f; 861 float fLockContributionThisFrame = 0.0f; 862 UpdateLockStatus( 863 params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); 864 865 RectificationBox clippingBox; 866 vec4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); 867 868 FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w, results); 869 870 float fLumaInstabilityFactor = 871 ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff, results); 872 vec3 fAccumulation = ComputeBaseAccumulationWeight( 873 params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); 874 875 vec3 fFinalColorYCoCg; 876 if (params.bIsNewSample) { 877 fFinalColorYCoCg = rgbToYCoCg(fUpsampledColorAndWeight.xyz); 878 } else { 879 RectifyHistory(params, clippingBox, fHistoryColorYCoCg, fAccumulation, fLockContributionThisFrame, 880 fThisFrameReactiveFactor, fLumaInstabilityFactor); 881 Accumulate(params, fHistoryColorYCoCg, fAccumulation, fUpsampledColorAndWeight); 882 fFinalColorYCoCg = fHistoryColorYCoCg; 883 } 884 885 vec3 fFinalColorLinearRGB = UnprepareRgb(fFinalColorYCoCg); 886 887 float fOutputTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); 888 889 results.fColorAndWeight = vec4(fFinalColorYCoCg, fOutputTemporalReactiveFactor); 890 results.fColor = fFinalColorLinearRGB; 891 892 return results; 893} 894 895layout(constant_id = 0) const uint CORE_POST_PROCESS_FLAGS = 0; 896 897#define cTgs 8 898 899layout(local_size_x = cTgs, local_size_y = cTgs, local_size_z = 1) in; 900void main() 901{ 902 const ivec2 iPxHrPos = ivec2(gl_GlobalInvocationID.xy); 903 const ivec2 displaySize = DisplaySize(); 904 if (iPxHrPos.x >= displaySize.x || iPxHrPos.y >= displaySize.y) { 905 return; 906 } 907 908 AccumulateOutputs results = AccumulatePass(iPxHrPos); 909 910 // --- Write Outputs --- 911 imageStore(out_HistoryColorReactive, iPxHrPos, results.fColorAndWeight); // Store YCoCg + Reactive 912 imageStore(out_HistoryLockStatus, iPxHrPos, vec4(results.fLockStatus, 0.0, 0.0)); // Store Trust(1), Lifetime(0) 913 imageStore(out_HistoryLuma, iPxHrPos, results.fLumaHistory); 914 imageStore(out_FinalColor, iPxHrPos, vec4(results.fColor, 1.0)); 915 imageStore(newLocksMask, iPxHrPos, vec4(0.0)); 916}