• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#version 460 core
2#extension GL_ARB_separate_shader_objects : enable
3#extension GL_ARB_shading_language_420pack : enable
4
5// Lume Super Resolution
6// Accumulate pass
7
8// includes
9#include "common/bloom_common.h"
10#include "render/shaders/common/render_color_conversion_common.h"
11#include "render/shaders/common/render_post_process_structs_common.h"
12
13// camera data
14struct DefaultCameraMatrixStruct {
15    mat4 view;
16    mat4 proj;
17    mat4 viewProj;
18
19    mat4 viewInv;
20    mat4 projInv;
21    mat4 viewProjInv;
22
23    mat4 viewPrevFrame;
24    mat4 projPrevFrame;
25    mat4 viewProjPrevFrame;
26
27    mat4 shadowViewProj;
28    mat4 shadowViewProjInv;
29
30    // .xy = jitter offset, .zw = jitter offset with baked screen size
31    vec4 jitter;
32    vec4 jitterPrevFrame;
33
34    // .xy = unique id (64-bit), .zw = layer mask (64 bit)
35    uvec4 indices;
36    // .x multi-view camera additional layer count, .yzw 3 multi-view camera indices
37    // yzw are packed, use unpack functions
38    uvec4 multiViewIndices;
39
40    vec4 frustumPlanes[2];
41
42    // .x environment count
43    uvec4 counts;
44    // padding to 256
45    uvec4 pad0;
46    mat4 matPad0;
47    mat4 matPad1;
48};
49
50// sets
51layout(set = 0, binding = 2) uniform sampler uSampler;
52
53layout(set = 0, binding = 0) uniform texture2D adjustedColorDepthClipTex; // rgba16f
54layout(set = 0, binding = 1) uniform texture2D dilatedReactiveMaskTex;    // rg16f
55layout(set = 0, binding = 3, r8) uniform image2D newLocksMask;            // r8
56layout(set = 0, binding = 4, rg16f) uniform image2D dilatedMotionTex;
57
58layout(set = 0, binding = 5) uniform texture2D historyColorReactiveTex; // rgba16f
59layout(set = 0, binding = 6) uniform texture2D historyLockStatusTex;    // rg16f
60layout(set = 0, binding = 7, rgba8) uniform image2D historyLumaTex;          // rgba16f
61
62layout(set = 0, binding = 8, rgba16f) uniform writeonly image2D out_HistoryColorReactive;
63layout(set = 0, binding = 9, rg16f) uniform writeonly image2D out_HistoryLockStatus;
64layout(set = 0, binding = 10, rgba8) uniform writeonly image2D out_HistoryLuma;
65
66layout(set = 0, binding = 11, rgba16f) uniform writeonly image2D out_FinalColor;
67layout(set = 0, binding = 13, r16f) uniform image2D luminanceTex; // r16
68
69// Push constants
70struct AccumulatePassPushConstant {
71    vec4 displaySizeInvSize;
72    vec4 viewportSizeInvSize;
73    float exposure;
74    uint frameIndex;
75    uint jitterSequenceLength;
76    float avgLanczosWeightPerFrame;
77    float maxAccumulationLanczosWeight;
78};
79
80struct AccumulateOutputs {
81    vec4 fColorAndWeight;
82    vec2 fLockStatus;
83    vec4 fLumaHistory;
84    vec3 fColor;
85};
86
87layout(push_constant, std430) uniform uPushConstantBlock
88{
89    AccumulatePassPushConstant uPc;
90};
91
92layout(set = 0, binding = 12, std140) uniform uCameraMatrices
93{
94    DefaultCameraMatrixStruct uCameras[16];
95};
96
97// --- Constants ---
98const float LSR_EPSILON = 1e-05f;
99const float LSR_FLT_MAX = 3.402823466e+38f;
100const int iLanczos2SampleCount = 9;
101const float fUpsampleLanczosWeightScale = 1.0f / 9.0f;
102
103const int LUMA_N_MINUS_1 = 0;
104const int LUMA_N_MINUS_2 = 1;
105const int LUMA_N_MINUS_3 = 2;
106const int LUMA_N_MINUS_4 = 3;
107
108const int LSR_LOCK_LIFETIME_REMAINING = 0;
109const int LSR_LOCK_TEMPORAL_LUMA = 1;
110
111// Luma instability variables
112const float kUnorm8Step     = 1.0 / 255.0;
113const float kInvUnorm8Step  = 255.0;
114
115//---------------- Utility & Helper Functions ------------------//
116
117ivec2 DisplaySize()
118{
119    return ivec2(uPc.displaySizeInvSize.xy);
120}
121vec2 DisplaySizeVec2()
122{
123    return uPc.displaySizeInvSize.xy;
124}
125vec2 InvDisplaySize()
126{
127    return uPc.displaySizeInvSize.zw;
128}
129ivec2 RenderSize()
130{
131    return ivec2(uPc.viewportSizeInvSize.xy);
132}
133vec2 RenderSizeVec2()
134{
135    return uPc.viewportSizeInvSize.xy;
136}
137vec2 InvRenderSize()
138{
139    return uPc.viewportSizeInvSize.zw;
140}
141vec2 UpscaleFactor()
142{
143    return DisplaySizeVec2() * InvRenderSize();
144}
145vec2 DownscaleFactor()
146{
147    return uPc.viewportSizeInvSize.xy * uPc.displaySizeInvSize.zw;
148}
149float Exposure()
150{
151    return 0.7f;
152}
153float PreviousFramePreExposure()
154{
155    return 1.0f;
156}
157uint FrameIndex()
158{
159    return 1;
160}
161vec2 JitterPixels()
162{
163    return uCameras[0].jitter.xy;
164}
165vec2 JitterUV()
166{
167    return uCameras[0].jitter.zw;
168}
169// vec2 JitterUV() { return vec2(0); }
170// vec2 JitterPrevUV() { return vec2(0); }
171
172uint JitterSequenceLength()
173{
174    return 16;
175}
176float AverageLanczosWeightPerFrame()
177{
178    return 1.0 / 9.0;
179}
180float MaxAccumulationLanczosWeight()
181{
182    return 0.95;
183}
184
185float saturate(float x)
186{
187    return clamp(x, 0.0, 1.0);
188}
189vec2 saturate(vec2 x)
190{
191    return clamp(x, 0.0, 1.0);
192}
193
194vec3 saturate(vec3 x)
195{
196    return clamp(x, 0.0, 1.0);
197}
198
199bool IsUvInside(vec2 uv)
200{
201    return all(greaterThanEqual(uv, vec2(0.0))) && all(lessThanEqual(uv, vec2(1.0)));
202}
203
204vec2 ClampUv(vec2 uv)
205{
206    return clamp(uv, vec2(0.0), vec2(1.0));
207}
208
209vec3 Tonemap(vec3 rgb)
210{
211    return rgb / (max(max(0.f, rgb.r), max(rgb.g, rgb.b)) + 1.f).xxx;
212}
213
214vec3 InverseTonemap(vec3 rgb)
215{
216    return rgb / max(1e-0001, 1.f - max(rgb.r, max(rgb.g, rgb.b))).xxx;
217}
218
219// --- Sampling Functions ---
220
221struct CatmullRomSamples9Tap {
222    vec2 UV[3];
223    vec2 Weight[3];
224    float FinalMultiplier;
225};
226
227CatmullRomSamples9Tap Get2DCatmullRom9Kernel(vec2 uv, vec2 size, vec2 invSize)
228{
229    CatmullRomSamples9Tap catmullSamples;
230    vec2 samplePos = uv * size;
231    vec2 texPos1 = floor(samplePos - 0.5f) + 0.5f;
232    vec2 f = samplePos - texPos1;
233
234    vec2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f));
235    vec2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f);
236    vec2 w2 = f * (0.5f + f * (2.0f - 1.5f * f));
237    vec2 w3 = f * f * (-0.5f + 0.5f * f);
238
239    catmullSamples.Weight[0] = w0;
240    catmullSamples.Weight[1] = w1 + w2;
241    catmullSamples.Weight[2] = w3;
242
243    vec2 offset12 = w2 / (w1 + w2);
244
245    // Compute the final UV coordinates we'll use for sampling the texture
246    catmullSamples.UV[0] = texPos1 - 1.0;
247    catmullSamples.UV[1] = texPos1 + 2.0;
248    catmullSamples.UV[2] = texPos1 + offset12;
249
250    catmullSamples.UV[0] *= invSize;
251    catmullSamples.UV[1] *= invSize;
252    catmullSamples.UV[2] *= invSize;
253    return catmullSamples;
254}
255
256// 1D Lanczos 2 Kernel function
257float Lanczos2(float x)
258{
259    x = abs(x);
260    if (x < LSR_EPSILON)
261        return 1.0;
262    if (x >= 2.0)
263        return 0.0;
264
265    float PI = 3.141592653589793;
266    float pix = PI * x;
267    float pix2 = PI * x * 0.5;
268
269    float s1 = sin(pix) / max(pix, LSR_EPSILON);
270    float s2 = sin(pix2) / max(pix2, LSR_EPSILON);
271
272    return s1 * s2;
273}
274
275vec4 SampleHistoryColorKernel(vec2 fUvSample)
276{
277    CatmullRomSamples9Tap samples = Get2DCatmullRom9Kernel(fUvSample, DisplaySize(), InvDisplaySize());
278
279    vec4 fColor = vec4(0.0f);
280
281    vec4 fColor00 = texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[0]);
282    fColor += fColor00 * samples.Weight[0].x * samples.Weight[0].y;
283    vec4 fColor20 = texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[2].x, samples.UV[0].y));
284    fColor += fColor20 * samples.Weight[1].x * samples.Weight[0].y;
285    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[1].x, samples.UV[0].y)) *
286              samples.Weight[2].x * samples.Weight[0].y;
287
288    vec4 fColor02 = texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[2].y));
289    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[2].y)) *
290              samples.Weight[0].x * samples.Weight[1].y;
291    vec4 fColor22 = texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[2]);
292    fColor += fColor22 * samples.Weight[1].x * samples.Weight[1].y;
293    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[1].x, samples.UV[2].y)) *
294              samples.Weight[2].x * samples.Weight[1].y;
295
296    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[0].x, samples.UV[1].y)) *
297              samples.Weight[0].x * samples.Weight[2].y;
298    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), vec2(samples.UV[2].x, samples.UV[1].y)) *
299              samples.Weight[1].x * samples.Weight[2].y;
300    fColor += texture(sampler2D(historyColorReactiveTex, uSampler), samples.UV[1]) * samples.Weight[2].x *
301              samples.Weight[2].y;
302
303    const vec4 fDeringingSamples[4] = { fColor00, fColor20, fColor02, fColor22 };
304
305    vec4 fDeringingMin = fDeringingSamples[0];
306    vec4 fDeringingMax = fDeringingSamples[0];
307
308    for (int iSampleIndex = 1; iSampleIndex < 4; ++iSampleIndex) {
309        fDeringingMin = min(fDeringingMin, fDeringingSamples[iSampleIndex]);
310        fDeringingMax = max(fDeringingMax, fDeringingSamples[iSampleIndex]);
311    }
312    fColor = clamp(fColor, fDeringingMin, fDeringingMax);
313
314    return fColor;
315}
316
317vec4 SampleAdjustedColorDepthClip(vec2 uv_render_jittered)
318{
319    return texture(sampler2D(adjustedColorDepthClipTex, uSampler), uv_render_jittered);
320}
321vec2 SampleDilatedReactiveMasks(vec2 uv_render_jittered)
322{
323    if (!IsUvInside(uv_render_jittered)) {
324        return vec2(0.0);
325    }
326    return texture(sampler2D(dilatedReactiveMaskTex, uSampler), uv_render_jittered).rg;
327}
328
329float LoadLuminance(ivec2 render_coord)
330{
331    return imageLoad(luminanceTex, render_coord).r;
332}
333
334float LoadNewLockRequest(ivec2 coord_display)
335{
336    return imageLoad(newLocksMask, coord_display).r;
337}
338vec2 LoadMotionVector(vec2 uv_render)
339{
340    ivec2 coord = ivec2(uv_render * RenderSize());
341    return imageLoad(dilatedMotionTex, coord).rg;
342}
343vec2 SampleHistoryLockStatus(vec2 uv_display)
344{
345    return texture(sampler2D(historyLockStatusTex, uSampler), uv_display).rg;
346}
347vec4 SampleHistoryLuma(vec2 uv_display)
348{
349    ivec2 pixel = ivec2(uv_display * DisplaySize() + 0.5);
350    return imageLoad(historyLumaTex, pixel);
351}
352
353// --- Data Structs ---
354struct LockState {
355    bool newLock;
356    bool wasLockedPrevFrame;
357};
358
359struct RectificationBox {
360    vec3 boxCenter;         // Weighted average color (Linear RGB)
361    vec3 boxVec;            // Standard deviation (Linear RGB)
362    vec3 aabbMin;           // Min bounds (Linear RGB)
363    vec3 aabbMax;           // Max bounds (Linear RGB)
364    float fBoxCenterWeight; // Total weight used for averaging
365    // Add sum of squares for proper variance calculation
366    vec3 weightedColorSum;
367    vec3 weightedColorSqSum;
368};
369
370struct AccumulationPassCommonParams {
371    ivec2 iPxHrPos;    // Integer coordinate at Display Res
372    vec2 fHrUv;        // Float UV at Display Res (pixel center)
373    vec2 fLrUv_Sample; // Float UV for sampling Render Res textures (with jitter)
374    vec2 fMotionVector;
375    float fHrVelocity;            // Pixel space velocity magnitude (Display Res)
376    vec2 fReprojectedHrUv;        // UV in previous frame's Display Res space
377    bool bIsExistingSample;       // Was fReprojectedHrUv on screen?
378    bool bIsNewSample;            // Should discard history? (First frame or !bIsExistingSample)
379    float fDilatedReactiveFactor; // Reactive mask X value
380    float fAccumulationMask;      // Reactive mask Y value (Composition/Motion)
381    float fDepthClipFactor;       // Depth clip value
382};
383
384float Lanczos2ApproxSqNoClamp(float x2)
385{
386    float a = (2.0f / 5.0f) * x2 - 1;
387    float b = (1.0f / 4.0f) * x2 - 1;
388    return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
389}
390
391float Lanczos2ApproxSq(float x2)
392{
393    x2 = min(x2, 4.0f);
394    return Lanczos2ApproxSqNoClamp(x2);
395}
396
397void RectificationBoxReset(inout RectificationBox rectificationBox)
398{
399    rectificationBox.fBoxCenterWeight = 0.0f;
400    rectificationBox.boxCenter = vec3(0.0);
401    rectificationBox.boxVec = vec3(0.0);
402    rectificationBox.aabbMin = vec3(LSR_FLT_MAX);
403    rectificationBox.aabbMax = vec3(-LSR_FLT_MAX);
404}
405
406void RectificationBoxAddInitialSample(
407    inout RectificationBox rectificationBox, const vec3 colorSample, const float fSampleWeight)
408{
409    rectificationBox.aabbMin = colorSample;
410    rectificationBox.aabbMax = colorSample;
411
412    vec3 weightedSample = colorSample * fSampleWeight;
413    rectificationBox.boxCenter = weightedSample;
414    rectificationBox.boxVec = colorSample * weightedSample;
415    rectificationBox.fBoxCenterWeight = fSampleWeight;
416}
417
418void RectificationBoxAddSample(
419    bool bInitialSample, inout RectificationBox rectificationBox, const vec3 colorSample, const float fSampleWeight)
420{
421    if (bInitialSample) {
422        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
423    } else {
424        rectificationBox.aabbMin = min(rectificationBox.aabbMin, colorSample);
425        rectificationBox.aabbMax = max(rectificationBox.aabbMax, colorSample);
426
427        vec3 weightedSample = colorSample * fSampleWeight;
428        rectificationBox.boxCenter += weightedSample;
429        rectificationBox.boxVec += colorSample * weightedSample;
430        rectificationBox.fBoxCenterWeight += fSampleWeight;
431    }
432}
433
434void RectificationBoxComputeVarianceBoxData(inout RectificationBox rectificationBox)
435{
436    rectificationBox.fBoxCenterWeight =
437        (abs(rectificationBox.fBoxCenterWeight) > float(LSR_EPSILON) ? rectificationBox.fBoxCenterWeight : 1.0);
438    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
439    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
440
441    vec3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
442    rectificationBox.boxVec = stdDev;
443}
444
445// --- Reprojection Related ---
446vec2 GetMotionVector(ivec2 iPxHrPos, vec2 fHrUv)
447{
448    vec2 uv_render = fHrUv;// * DownscaleFactor();
449    uv_render = ClampUv(uv_render);
450    if (!IsUvInside(uv_render)) {
451        return vec2(0.0);
452    }
453    return LoadMotionVector(uv_render);
454}
455
456float GetPxHrVelocity(vec2 motionVector)
457{
458    float vel = length(motionVector);
459    if (vel < 0.01)
460        vel = 0;
461    else
462        vel = length(motionVector * DisplaySizeVec2());
463    return vel;
464}
465
466void ComputeReprojectedUVs(vec2 fHrUv, vec2 motionVector, out vec2 fReprojectedHrUv, out bool bIsExistingSample)
467{
468    fReprojectedHrUv = fHrUv - motionVector;
469    bIsExistingSample = IsUvInside(fReprojectedHrUv);
470}
471
472void ReprojectHistoryColor(
473    vec2 fReprojectedHrUv, out vec3 historyColorYCoCg, out float temporalReactiveFactor, out bool bInMotionLastFrame)
474{
475    vec4 historySample = SampleHistoryColorKernel(fReprojectedHrUv);
476
477    float alpha = historySample.a;
478    bInMotionLastFrame = (alpha < 0.0);
479    temporalReactiveFactor = abs(alpha);
480
481    historyColorYCoCg = historySample.rgb;
482}
483
484LockState ReprojectHistoryLockStatus(vec2 fReprojectedHrUv, ivec2 iPxHrPos, out vec2 reprojectedLockStatus)
485{
486    LockState state;
487
488    float newLockRequest = LoadNewLockRequest(iPxHrPos);
489    state.newLock = (newLockRequest > (127.0 / 255.0));
490
491    reprojectedLockStatus = SampleHistoryLockStatus(fReprojectedHrUv);
492
493    // Check if previous frame was locked based on Lifetime Remaining
494    state.wasLockedPrevFrame = (reprojectedLockStatus[LSR_LOCK_LIFETIME_REMAINING] > 0.0f);
495
496    return state;
497}
498
499AccumulationPassCommonParams InitParams(ivec2 iPxHrPos)
500{
501    AccumulationPassCommonParams params;
502    params.iPxHrPos = iPxHrPos;
503    params.fHrUv = (vec2(iPxHrPos) + 0.5) * InvDisplaySize();
504    vec2 jitterUV = JitterUV();
505
506    vec2 renderUvBase = params.fHrUv;
507    params.fLrUv_Sample = renderUvBase + jitterUV;
508    params.fLrUv_Sample = clamp(params.fLrUv_Sample, vec2(0), vec2(1));
509
510    vec2 sampledMV = GetMotionVector(iPxHrPos, params.fHrUv); // Samples render-res MV texture
511    params.fMotionVector = sampledMV / DownscaleFactor();
512    params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
513    ComputeReprojectedUVs(params.fHrUv, params.fMotionVector, params.fReprojectedHrUv, params.bIsExistingSample);
514    // Init factors
515    params.fDilatedReactiveFactor = 0.0;
516    params.fAccumulationMask = 0.0;
517    params.fDepthClipFactor = 0.0;
518    params.bIsNewSample = false;
519    return params;
520}
521
522void InitReactiveMaskFactors(inout AccumulationPassCommonParams params)
523{
524    vec2 dilatedMasks = SampleDilatedReactiveMasks(params.fLrUv_Sample);
525    params.fDilatedReactiveFactor = dilatedMasks.x;
526    params.fAccumulationMask = dilatedMasks.y;
527}
528void InitDepthClipFactors(inout AccumulationPassCommonParams params)
529{
530    params.fDepthClipFactor = saturate(SampleAdjustedColorDepthClip(params.fLrUv_Sample).a);
531}
532void initIsNewSample(inout AccumulationPassCommonParams params)
533{
534    bool bIsResetFrame = (0 == FrameIndex());
535    params.bIsNewSample = (!params.bIsExistingSample || bIsResetFrame);
536}
537
538// --- Lock Status Update ---
539void KillLock(inout vec2 lockStatus)
540{
541    lockStatus[LSR_LOCK_LIFETIME_REMAINING] = 0;
542}
543void InitializeNewLockSample(out vec2 lockStatus)
544{
545    lockStatus = vec2(0.0);
546}
547
548float MinDividedByMax(float a, float b)
549{
550    return min(a, b) / max(max(a, b), LSR_EPSILON);
551}
552
553float GetShadingChangeLuma(ivec2 iPxHrPos, vec2 fUvCoord)
554{
555    ivec2 size = imageSize(luminanceTex);
556    ivec2 coord = clamp(ivec2(fUvCoord * size), ivec2(0), size - ivec2(1));
557    return LoadLuminance(coord);
558}
559
560void UpdateLockStatus(const AccumulationPassCommonParams params, inout float fReactiveFactor, LockState lockState,
561    inout vec2 fLockStatus, out float fLockContributionThisFrame, out float fLuminanceDiff)
562{
563    float fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
564
565    fLockStatus[LSR_LOCK_TEMPORAL_LUMA] =
566        (fLockStatus[LSR_LOCK_TEMPORAL_LUMA] == 0.0) ? fShadingChangeLuma : fLockStatus[LSR_LOCK_TEMPORAL_LUMA];
567
568    float fPrevLuma = fLockStatus[LSR_LOCK_TEMPORAL_LUMA];
569
570    fLuminanceDiff = 1.0 - MinDividedByMax(fPrevLuma, fShadingChangeLuma);
571
572    if (lockState.newLock) {
573        fLockStatus[LSR_LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
574        fLockStatus[LSR_LOCK_LIFETIME_REMAINING] = (fLockStatus[LSR_LOCK_LIFETIME_REMAINING] != 0.0) ? 2.0 : 1.0;
575    } else if (fLockStatus[LSR_LOCK_LIFETIME_REMAINING] <= 1.0) {
576        fLockStatus[LSR_LOCK_TEMPORAL_LUMA] = mix(fLockStatus[LSR_LOCK_TEMPORAL_LUMA], fShadingChangeLuma, 0.5);
577    } else {
578        if (fLuminanceDiff > 0.2)
579            KillLock(fLockStatus);
580    }
581
582    fReactiveFactor = max(fReactiveFactor, saturate((fLuminanceDiff - 0.1) * 10.0));
583
584    fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= (1.0 - fReactiveFactor);
585    fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= saturate(1.0 - params.fAccumulationMask);
586    fLockStatus[LSR_LOCK_LIFETIME_REMAINING] *= float(params.fDepthClipFactor < 0.1);
587
588    float lifetimeC = saturate(fLockStatus[LSR_LOCK_LIFETIME_REMAINING] - 1.0);
589    float shadingC = saturate(MinDividedByMax(fLockStatus[LSR_LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
590    fLockContributionThisFrame = saturate(saturate(lifetimeC * 4.0) * shadingC);
591}
592
593// --- Luma Instability ---
594float ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params,
595                                   RectificationBox clippingBox,
596                                   float thisFrameReactiveFactor,
597                                   float luminanceDiff,
598                                   inout AccumulateOutputs result)
599{
600    float currentLuma = clippingBox.boxCenter.x;            // linear 0-1
601    currentLuma = saturate(currentLuma);
602    currentLuma = floor(currentLuma * kInvUnorm8Step + 0.5) * kUnorm8Step;
603
604    bool sampleHistory =
605        (max(max(params.fDepthClipFactor,
606                 params.fAccumulationMask),
607                 luminanceDiff) < 0.1) &&
608        (!params.bIsNewSample);
609
610    vec4 history = sampleHistory
611                 ? SampleHistoryLuma(params.fReprojectedHrUv)
612                 : vec4(0.0);
613
614    float d0      = currentLuma - history[LUMA_N_MINUS_1];
615    float minDiff = abs(d0);
616
617    if (minDiff >= kUnorm8Step)
618    {
619        for (int i = LUMA_N_MINUS_2; i <= LUMA_N_MINUS_4; ++i)
620        {
621            float di = currentLuma - history[i];
622            if (sign(d0) == sign(di))
623                minDiff = min(minDiff, abs(di));
624        }
625
626        float boxFactor =
627            pow(saturate(clippingBox.boxVec.x / 0.1), 6.0);
628
629        float instability = float(minDiff != abs(d0)) * boxFactor;
630        instability       = float(instability > kUnorm8Step);
631
632        instability      *= 1.0 - max(params.fAccumulationMask,
633                                      pow(thisFrameReactiveFactor, 1.0 / 6.0));
634
635        history.rgb = history.gba;
636        history.a   = currentLuma;
637
638        result.fLumaHistory = history;
639        return instability * float(history[LUMA_N_MINUS_4] != 0.0);
640    }
641
642    history.rgb = history.gba;
643    history.a   = currentLuma;
644    result.fLumaHistory = history;
645    return 0.0;
646}
647
648void FinalizeLockStatus(
649    const AccumulationPassCommonParams params, vec2 fLockStatus, float fUpsampledWeight, inout AccumulateOutputs result)
650{
651    vec2 hrMotionVectorUV = params.fMotionVector;
652    vec2 fEstimatedUvNextFrame = params.fHrUv + hrMotionVectorUV;
653
654    if (!IsUvInside(fEstimatedUvNextFrame)) {
655        KillLock(fLockStatus);
656    } else {
657        const float fAverageWeightPerFrame = (1.0 / 9.0) * 0.74;
658
659        float fLifetimeDecreaseMaxTotalWeight =
660            max(LSR_EPSILON, float(JitterSequenceLength()) * fAverageWeightPerFrame);
661
662        float fLifetimeDecrease = (fUpsampledWeight / fLifetimeDecreaseMaxTotalWeight);
663
664        fLockStatus[LSR_LOCK_LIFETIME_REMAINING] =
665            max(0.0f, fLockStatus[LSR_LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
666    }
667
668    result.fLockStatus = fLockStatus;
669}
670
671// --- Accumulation Weight --
672//-------------------------------------------------------------------------------------------------
673// Upsample Functions (Adapted from ARM ASR)
674//-------------------------------------------------------------------------------------------------
675void Deringing(RectificationBox clippingBox, inout vec3 fColor)
676{
677    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
678}
679
680float GetUpsampleLanczosWeight(vec2 fSrcSampleOffset, float fKernelWeight)
681{
682    vec2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight;
683    return Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
684}
685
686float ComputeMaxKernelWeight()
687{
688    const float fKernelSizeBias = 1.0f;
689    vec2 dsFactor = DownscaleFactor();
690    vec2 invDsFactor = (dsFactor.x > LSR_EPSILON && dsFactor.y > LSR_EPSILON) ? (vec2(1.0) / dsFactor) : vec2(1.0);
691    float fKernelWeight = 1.0f + (invDsFactor.x - 1.0f) * fKernelSizeBias;
692    return min(1.99f, fKernelWeight);
693}
694
695vec3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, float fThisFrameReactiveFactor,
696    bool bInMotionLastFrame, float fUpsampledWeight, LockState lockState)
697{
698    float fBaseAccumulation = MaxAccumulationLanczosWeight() * float(params.bIsExistingSample) *
699                              (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
700
701    float motionFactor1 = max(float(bInMotionLastFrame), saturate(params.fHrVelocity * 10.0f));
702    fBaseAccumulation = min(fBaseAccumulation, mix(fBaseAccumulation, fUpsampledWeight * 10.0f, motionFactor1));
703
704    float motionFactor2 = saturate(params.fHrVelocity / 20.0f);
705    fBaseAccumulation = min(fBaseAccumulation, mix(fBaseAccumulation, fUpsampledWeight, motionFactor2));
706
707    return vec3(fBaseAccumulation);
708}
709
710vec4 ComputeUpsampledColorAndWeight(
711    const AccumulationPassCommonParams params, inout RectificationBox clippingBox, float fReactiveFactor)
712{
713    vec2 fDstOutputPos = vec2(params.iPxHrPos) + vec2(0.5);
714    vec2 fSrcOutputPos = fDstOutputPos * DownscaleFactor();
715    ivec2 iSrcInputPos = ivec2(floor(fSrcOutputPos));
716    vec2 fSrcUnjitteredPos = vec2(iSrcInputPos) + vec2(0.5);
717    vec2 fBaseSampleOffset = fSrcUnjitteredPos - fSrcOutputPos;
718
719    float fKernelReactiveFactor = max(fReactiveFactor, float(params.bIsNewSample));
720    float fKernelBiasMax = ComputeMaxKernelWeight() * (1.0 - fKernelReactiveFactor);
721    float fKernelBiasMin = max(1.0, (1.0 + fKernelBiasMax) * 0.3);
722    float fKernelBiasFactor = max(0.0, max(0.25 * params.fDepthClipFactor, fKernelReactiveFactor));
723    float fKernelBias = mix(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
724
725    float fRectificationCurveBias = mix(-2.0, -3.0, clamp(params.fHrVelocity / 50.0, 0.0, 1.0));
726
727    vec4 fColorAndWeight = vec4(0.0);
728
729    RectificationBoxReset(clippingBox);
730
731    const ivec2 sampleOffsets[iLanczos2SampleCount] = { ivec2(-1, -1), ivec2(0, -1), ivec2(1, -1), ivec2(-1, 0),
732        ivec2(0, 0), ivec2(1, 0), ivec2(-1, 1), ivec2(0, 1), ivec2(1, 1) };
733    vec3 fSamplesRGB[iLanczos2SampleCount];
734
735    for (int idx = 0; idx < iLanczos2SampleCount; idx++) {
736        ivec2 neighborCoord = iSrcInputPos + sampleOffsets[idx];
737        vec2 neighborUV_unjittered = (vec2(neighborCoord) + 0.5) * InvRenderSize();
738        vec2 neighborUV_sampling = neighborUV_unjittered - JitterUV();
739        vec3 rgb = yCoCgToRgb(SampleAdjustedColorDepthClip(ClampUv(neighborUV_unjittered)).rgb);
740        fSamplesRGB[idx] = InverseTonemap(rgb);
741    }
742
743    for (int idx = 0; idx < iLanczos2SampleCount; idx++) {
744        vec2 fSrcSampleOffsetPixels = fBaseSampleOffset + vec2(sampleOffsets[idx]);
745        float fSampleWeight = GetUpsampleLanczosWeight(fSrcSampleOffsetPixels, fKernelBias);
746        fColorAndWeight.rgb += fSamplesRGB[idx] * fSampleWeight;
747        fColorAndWeight.a += fSampleWeight;
748        float fBoxSampleWeight = exp(fRectificationCurveBias * dot(fSrcSampleOffsetPixels, fSrcSampleOffsetPixels));
749        RectificationBoxAddSample(idx == 0, clippingBox, fSamplesRGB[idx], fBoxSampleWeight);
750    }
751
752    RectificationBoxComputeVarianceBoxData(clippingBox);
753
754    if (fColorAndWeight.a > LSR_EPSILON) {
755        fColorAndWeight.rgb /= fColorAndWeight.a;
756        fColorAndWeight.a *= fUpsampleLanczosWeightScale;
757        Deringing(clippingBox, fColorAndWeight.rgb);
758    } else {
759        fColorAndWeight.w = 0;
760    }
761
762    return fColorAndWeight;
763}
764
765void RectifyHistory(const AccumulationPassCommonParams params, RectificationBox clippingBox,
766    inout vec3 fHistoryColorYCoCg, inout vec3 fAccumulation, float fLockContributionThisFrame,
767    float fThisFrameReactiveFactor, float fLumaInstabilityFactor)
768{
769    float fScaleFactorInfluence = min(20.0f, pow(1.0f / (DownscaleFactor().x * DownscaleFactor().y), 3.0f));
770    float fVelocityFactor = clamp(params.fHrVelocity / 20.0f, 0.0f, 1.0f);
771    float fBoxScaleT = max(params.fDepthClipFactor, max(params.fAccumulationMask, fVelocityFactor));
772    float fBoxScale = mix(fScaleFactorInfluence, 1.0f, fBoxScaleT);
773
774    vec3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
775    vec3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
776    vec3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
777    boxMin = max(clippingBox.aabbMin, boxMin);
778    boxMax = min(clippingBox.aabbMax, boxMax);
779    //boxMin = InverseTonemap(boxMin);
780    //boxMax = InverseTonemap(boxMax);
781    vec3 colRgb = yCoCgToRgb(fHistoryColorYCoCg);
782
783    if (any(greaterThan(boxMin, colRgb)) || any(greaterThan(colRgb, boxMax))) {
784        vec3 fClampedHistoryColor = clamp(colRgb, boxMin, boxMax);
785        vec3 fHistoryContribution = max(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
786
787        float fReactiveFactor = params.fDilatedReactiveFactor;
788        float fReactiveContribution = 1.0 - pow(fReactiveFactor, 1.0 / 2.0);
789        fHistoryContribution *= fReactiveContribution;
790
791        colRgb = mix(fClampedHistoryColor, colRgb, saturate(fHistoryContribution));
792
793        const vec3 fAccumulationMin = min(fAccumulation, vec3(0.1f));
794        fAccumulation = mix(fAccumulationMin, fAccumulation, saturate(fHistoryContribution));
795    }
796    fHistoryColorYCoCg = rgbToYCoCg(colRgb);
797}
798
799void Accumulate(const AccumulationPassCommonParams params, inout vec3 fHistoryColorYCoCg, vec3 fAccumulation,
800    vec4 fUpsampledColorAndWeight)
801{
802    vec3 newYCoCg = rgbToYCoCg(fUpsampledColorAndWeight.xyz);
803    fAccumulation  = max(vec3(LSR_EPSILON), fAccumulation + fUpsampledColorAndWeight.www);
804    vec3 a  = fUpsampledColorAndWeight.www / fAccumulation;
805    fHistoryColorYCoCg = mix(fHistoryColorYCoCg, newYCoCg, a);
806}
807
808// --- Output Color/Factor Preparation ---
809vec3 UnprepareRgb(vec3 colorYCoCg)
810{
811    vec3 colorRGB = yCoCgToRgb(colorYCoCg);
812    //colorRGB = InverseTonemap(colorRGB);
813    colorRGB /= Exposure();
814    return max(vec3(0.0), colorRGB);
815}
816
817float ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, float thisFrameReactiveFactor)
818{
819    float fNewFactor = min(0.99f, thisFrameReactiveFactor);
820    fNewFactor = max(fNewFactor, mix(fNewFactor, 0.4f, clamp(params.fHrVelocity, 0.0f, 1.0f)));
821    fNewFactor = max(fNewFactor * fNewFactor, max(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
822    fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
823    if (clamp(params.fHrVelocity * 10.0f, 0.0f, 1.0f) >= 1.0f) {
824        fNewFactor = max(LSR_EPSILON, fNewFactor) * -1.0f;
825    }
826    return fNewFactor;
827}
828
829AccumulateOutputs AccumulatePass(ivec2 iPxHrPos)
830{
831    AccumulationPassCommonParams params = InitParams(iPxHrPos);
832
833    vec3 fHistoryColorYCoCg = vec3(0.0);
834    vec2 fLockStatus = vec2(0.0);
835    InitializeNewLockSample(fLockStatus);
836    float fTemporalReactiveFactor = 0.0f;
837    bool bInMotionLastFrame = false;
838    LockState lockState;
839    lockState.newLock = false;
840    lockState.wasLockedPrevFrame = false;
841
842    initIsNewSample(params);
843    bool bUseHistory = !params.bIsNewSample; //&& uPc.frameIndex > 0;
844
845    AccumulateOutputs results;
846
847    if (bUseHistory) {
848        ReprojectHistoryColor(params.fReprojectedHrUv, fHistoryColorYCoCg, fTemporalReactiveFactor, bInMotionLastFrame);
849        lockState = ReprojectHistoryLockStatus(params.fReprojectedHrUv, iPxHrPos, fLockStatus);
850    } else {
851        lockState.newLock = (LoadNewLockRequest(iPxHrPos) > (127.0 / 255.0));
852        lockState.wasLockedPrevFrame = false;
853    }
854
855    InitReactiveMaskFactors(params);
856    InitDepthClipFactors(params);
857
858    float fThisFrameReactiveFactor = max(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
859
860    float fLuminanceDiff = 0.0f;
861    float fLockContributionThisFrame = 0.0f;
862    UpdateLockStatus(
863        params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
864
865    RectificationBox clippingBox;
866    vec4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
867
868    FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w, results);
869
870    float fLumaInstabilityFactor =
871        ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff, results);
872    vec3 fAccumulation = ComputeBaseAccumulationWeight(
873        params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
874
875    vec3 fFinalColorYCoCg;
876    if (params.bIsNewSample) {
877        fFinalColorYCoCg = rgbToYCoCg(fUpsampledColorAndWeight.xyz);
878    } else {
879        RectifyHistory(params, clippingBox, fHistoryColorYCoCg, fAccumulation, fLockContributionThisFrame,
880            fThisFrameReactiveFactor, fLumaInstabilityFactor);
881        Accumulate(params, fHistoryColorYCoCg, fAccumulation, fUpsampledColorAndWeight);
882        fFinalColorYCoCg = fHistoryColorYCoCg;
883    }
884
885    vec3 fFinalColorLinearRGB = UnprepareRgb(fFinalColorYCoCg);
886
887    float fOutputTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
888
889    results.fColorAndWeight = vec4(fFinalColorYCoCg, fOutputTemporalReactiveFactor);
890    results.fColor = fFinalColorLinearRGB;
891
892    return results;
893}
894
895layout(constant_id = 0) const uint CORE_POST_PROCESS_FLAGS = 0;
896
897#define cTgs 8
898
899layout(local_size_x = cTgs, local_size_y = cTgs, local_size_z = 1) in;
900void main()
901{
902    const ivec2 iPxHrPos = ivec2(gl_GlobalInvocationID.xy);
903    const ivec2 displaySize = DisplaySize();
904    if (iPxHrPos.x >= displaySize.x || iPxHrPos.y >= displaySize.y) {
905        return;
906    }
907
908    AccumulateOutputs results = AccumulatePass(iPxHrPos);
909
910    // --- Write Outputs ---
911    imageStore(out_HistoryColorReactive, iPxHrPos, results.fColorAndWeight);          // Store YCoCg + Reactive
912    imageStore(out_HistoryLockStatus, iPxHrPos, vec4(results.fLockStatus, 0.0, 0.0)); // Store Trust(1), Lifetime(0)
913    imageStore(out_HistoryLuma, iPxHrPos, results.fLumaHistory);
914    imageStore(out_FinalColor, iPxHrPos, vec4(results.fColor, 1.0));
915    imageStore(newLocksMask, iPxHrPos, vec4(0.0));
916}