1#version 450 core 2#extension GL_ARB_separate_shader_objects : enable 3#extension GL_ARB_shading_language_420pack : enable 4 5// includes 6 7#include "render/shaders/common/render_color_conversion_common.h" 8#include "render/shaders/common/render_post_process_common.h" 9#include "render/shaders/common/render_tonemap_common.h" 10 11// sets 12 13layout(set = 0, binding = 0) uniform texture2D uDepth; 14layout(set = 0, binding = 1) uniform texture2D uColor; 15layout(set = 0, binding = 2) uniform texture2D uVelocity; 16layout(set = 0, binding = 3) uniform texture2D uHistory; 17layout(set = 0, binding = 4) uniform sampler uSampler; 18 19layout(push_constant, std430) uniform uPostProcessPushConstant 20{ 21 LocalPostProcessPushConstantStruct uPc; 22}; 23 24// in / out 25 26layout(location = 0) in vec2 inUv; 27 28layout(location = 0) out vec4 outColor; 29 30// NOTE: cannot be used (remove if not used for any input) 31#define ENABLE_INPUT_ATTACHMENTS 0 32 33#define QUALITY_LOW 0 34#define QUALITY_MED 1 35#define QUALITY_HIGH 2 36 37// if the magnitude of the velocity vector is greater than 38// the threshold, the pixel is considered to be in motion. 39#define STATIONARY_VELOCITY_THRESHOLD 0.001 40// used to detect if the current pixel is an edge 41#define DEPTH_DIFF 0.0005 42 43float GetUnpackDepthBuffer(const vec2 uv) { 44#if (ENABLE_INPUT_ATTACHMENTS == 1) 45 return subpassLoad(uDepth).x; 46#else 47 return textureLod(sampler2D(uDepth, uSampler), uv, 0).x; 48#endif 49} 50 51vec2 GetUnpackVelocity(const vec2 uv, const vec2 invSize) { 52#if (ENABLE_INPUT_ATTACHMENTS == 1) 53 return subpassLoad(uVelocity).xy; 54#else 55 return textureLod(sampler2D(uVelocity, uSampler), uv, 0).xy * invSize; 56#endif 57} 58 59vec3 RGBToYCoCg(const vec3 rgb) { 60 const float co = rgb.r - rgb.b; 61 const float tmp = rgb.b + co / 2.0; 62 const float cg = rgb.g - tmp; 63 const float y = tmp + cg / 2.0; 64 return vec3(y, co, cg); 65} 66 67vec3 YCoCgToRGB(const vec3 ycocg) { 68 const float tmp = ycocg.r - ycocg.b / 2.0; 69 const float g = ycocg.b + tmp; 70 const float b = tmp - ycocg.g / 2.0; 71 const float r = ycocg.g + b; 72 return vec3(r, g, b); 73} 74 75vec2 GetVelocity(out bool isEdge) { 76 const uint quality = uint(uPc.factor.y + 0.5); 77 vec2 velUv = inUv; 78 79 if (quality == QUALITY_MED) { 80 // sample 5 values in a cross pattern 81 82 const uint offsetCount = 5; 83 const ivec2 offsets[offsetCount] = { 84 ivec2(-1, -1), 85 ivec2(1, -1), 86 ivec2(0, 0), 87 ivec2(-1, 1), 88 ivec2(1, 1), 89 }; 90 91 float depths[offsetCount]; 92 93 const ivec2 accessOffsets[4] = { 94 ivec2(-1, -1), 95 ivec2(1, -1), 96 ivec2(0, 0), 97 ivec2(-1, 1) 98 }; 99 const vec4 depth0123 = textureGatherOffsets(sampler2D(uDepth, uSampler), inUv.xy, accessOffsets, 0); 100 101 depths[0] = depth0123.x; 102 depths[1] = depth0123.y; 103 depths[2] = depth0123.z; 104 depths[3] = depth0123.w; 105 depths[4] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[4]).x; 106 107 const float currentDepth = depths[2]; 108 109 float minDepth = depths[0]; 110 float avgDepth = depths[0]; 111 int minDepthIndex = 0; 112 for (int ii = 1; ii < offsetCount; ++ii) { 113 if (depths[ii] < minDepth) { 114 minDepth = depths[ii]; 115 minDepthIndex = ii; 116 } 117 118 avgDepth += depths[ii]; 119 } 120 121 const ivec2 offset = offsets[minDepthIndex]; 122 velUv += offset * (uPc.viewportSizeInvSize.zw); 123 124 avgDepth /= float(offsetCount); 125 isEdge = abs(currentDepth - avgDepth) > DEPTH_DIFF ? true : false; 126 } 127 else if (quality >= QUALITY_HIGH) { 128 // sample a full 3x3 grid 129 130 const uint offsetCount = 9; 131 const ivec2 offsets[offsetCount] = { 132 // the first gather square 133 ivec2(-1, -1), 134 ivec2(-1, 0), 135 ivec2(0, -1), 136 ivec2(0, 0), 137 138 // the second gather square 139 // ivec2(0, 0), 140 ivec2(0, 1), 141 ivec2(1, 0), 142 ivec2(1, 1), 143 144 // the remaining corners 145 ivec2(1, -1), 146 ivec2(-1, 1) 147 }; 148 149 float depths[offsetCount]; 150 151 // textureGather samples from the given uv to uv + ivec2(1, 1) in a 2x2 pattern 152 const vec4 depth0123 = textureGather(sampler2D(uDepth, uSampler), inUv.xy - ivec2(-1, -1), 0); 153 const vec4 depth3456 = textureGather(sampler2D(uDepth, uSampler), inUv.xy, 0); 154 155 depths[0] = depth0123.x; 156 depths[1] = depth0123.y; 157 depths[2] = depth0123.z; 158 depths[3] = depth0123.w; 159 160 depths[4] = depth3456.y; 161 depths[5] = depth3456.z; 162 depths[6] = depth3456.w; 163 164 depths[7] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[7]).x; 165 depths[8] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[8]).x; 166 167 const float currentDepth = depths[2]; 168 169 float minDepth = depths[0]; 170 float avgDepth = depths[0]; 171 int minDepthIndex = 0; 172 for (int ii = 1; ii < offsetCount; ++ii) { 173 if (depths[ii] < minDepth) { 174 minDepth = depths[ii]; 175 minDepthIndex = ii; 176 } 177 178 avgDepth += depths[ii]; 179 } 180 181 const ivec2 offset = offsets[minDepthIndex]; 182 velUv += offset * (uPc.viewportSizeInvSize.zw); 183 184 avgDepth /= float(offsetCount); 185 isEdge = abs(currentDepth - avgDepth) > DEPTH_DIFF ? true : false; 186 } 187 // multiply velocity to correct uv offsets 188 return textureLod(sampler2D(uVelocity, uSampler), velUv, 0).xy * uPc.viewportSizeInvSize.zw; 189} 190 191// clip towards aabb center 192// e.g. "temporal reprojection anti-aliasing in inside" 193vec4 ClipAabb(const vec3 aabbMin, const vec3 aabbMax, const vec4 color, const vec4 history) { 194 const vec3 pClip = 0.5 * (aabbMax + aabbMin); 195 const vec3 eClip = 0.5 * (aabbMax - aabbMin); 196 197 const vec4 vClip = history - vec4(pClip, color.w); 198 const vec3 vUnit = vClip.xyz - eClip; 199 const vec3 aUnit = abs(vUnit); 200 const float maUnit = max(aUnit.x, max(aUnit.y, aUnit.z)); 201 // if maUnit <= 1.0 the point is inside the aabb 202 const vec4 res = (maUnit > 1.0) ? (vec4(pClip, color.w) + vClip / maUnit) : color; 203 return res; 204} 205 206// clip the color to be inside a box defined by the center (mean) and size (variance or stdev) 207vec3 VarianceClipAABB(const vec3 history, const vec3 currColor, const vec3 center, const vec3 size) { 208 if (all(lessThanEqual(abs(history - center), size))) { 209 return history; 210 } 211 212 const vec3 dir = currColor - history; 213 const vec3 near = center - sign(dir) * size; 214 const vec3 tAll = (near - history) / dir; 215 216 // just some sufficiently large value 217 float t = 1e20; 218 219 for (int ii = 0; ii < 3; ii++) { 220 if (tAll[ii] >= 0.0 && tAll[ii] < t) { 221 t = tAll[ii]; 222 } 223 } 224 225 if (t >= 1e20) { 226 return history; 227 } 228 229 return history + dir * t; 230} 231 232// mean-variance clip history color to acceptable values within the current frame color 233vec4 CalcVarianceClippedHistoryColor(const vec3 history, const vec3 currColor, const bool useyCoCG) { 234 const uint quality = uint(uPc.factor.y + 0.5); 235 236 vec3 colors[9]; 237 uint numSamples = 0; 238 239 if (quality <= QUALITY_MED) { 240 // sample only in a cross pattern 241 numSamples = 5; 242 243 colors[0] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 244 colors[1] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 245 colors[2] = currColor; 246 colors[3] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 247 colors[4] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 248 } else { 249 // sample all 9 values within the 3x3 grid 250 numSamples = 9; 251 252 colors[0] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, -1)).rgb; 253 colors[1] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 254 colors[2] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, -1)).rgb; 255 colors[3] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 256 colors[4] = currColor; 257 colors[5] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 258 colors[6] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 1)).rgb; 259 colors[7] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 260 colors[8] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 1)).rgb; 261 } 262 263 vec3 sum = vec3(0); 264 vec3 sumSq = vec3(0); 265 266 for (int ii = 0; ii < numSamples; ii++) { 267 vec3 value = colors[ii]; 268 if (useyCoCG) { 269 value = RGBToYCoCg(value); 270 } 271 sum += value; 272 sumSq += value * value; 273 } 274 275 const vec3 mean = sum / float(numSamples); 276 const vec3 variance = sqrt((sumSq / float(numSamples)) - mean * mean); 277 278 const vec3 minColor = mean - variance; 279 const vec3 maxColor = mean + variance; 280 281 vec3 clampedHistoryColor; 282 if (useyCoCG) { 283 clampedHistoryColor = YCoCgToRGB(VarianceClipAABB(RGBToYCoCg(history), RGBToYCoCg(currColor), mean, variance)); 284 } else { 285 clampedHistoryColor = VarianceClipAABB(history, currColor, mean, variance); 286 } 287 288 return vec4(clampedHistoryColor, 1.0); 289} 290 291vec4 CalcMinMaxClippedHistoryColor(const vec3 history, const vec3 currColor) { 292 // sample 3x3 grid 293 // 0 1 2 294 // 3 4 5 295 // 6 7 8 296 297 // Box filter for history 298 // diamond shape 299 vec3 bc1 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 300 vec3 bc3 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 301 // center sample 302 vec3 bc4 = currColor; 303 vec3 min13 = min(min(bc1, bc3), bc4); 304 vec3 max13 = max(max(bc1, bc3), bc4); 305 306 vec3 bc5 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 307 vec3 bc7 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 308 vec3 min57 = min(bc5, bc7); 309 vec3 max57 = max(bc5, bc7); 310 vec3 boxMin = min(min13, min57); 311 vec3 boxMax = max(max13, max57); 312 313 // 314 vec3 bc0 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, -1)).rgb; 315 vec3 bc2 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, -1)).rgb; 316 vec3 min02 = min(boxMin, min(bc0, bc2)); 317 vec3 max02 = max(boxMax, max(bc0, bc2)); 318 319 vec3 bc6 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 1)).rgb; 320 vec3 bc8 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 1)).rgb; 321 vec3 min68 = min(bc6, bc8); 322 vec3 max68 = max(bc6, bc8); 323 324 // corners 325 const vec3 boxMinCorner = min(min02, min68); 326 const vec3 boxMaxCorner = max(max02, max68); 327 328 boxMin = (boxMin + boxMinCorner) * 0.5; 329 boxMax = (boxMax + boxMaxCorner) * 0.5; 330 return vec4(clamp(history, boxMin, boxMax), 1.0); 331} 332 333vec4 cubic(const float value) { 334 const vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - value; 335 const vec4 s = n * n * n; 336 const float x = s.x; 337 const float y = s.y - 4.0 * s.x; 338 const float z = s.z - 4.0 * s.y + 6.0 * s.x; 339 const float w = 6.0 - x - y - z; 340 return vec4(x, y, z, w) * (1.0 / 6.0); 341} 342 343vec4 GetHistory(const vec2 historyUv, const vec4 currColor, const bool bicubic, 344 const bool useVarianceClipping, const bool useyCoCG) { 345 vec4 history; 346 347 if (bicubic) { 348 vec2 texCoords = historyUv * uPc.viewportSizeInvSize.xy - 0.5; 349 350 const vec2 fxy = fract(texCoords); 351 texCoords -= fxy; 352 353 const vec4 xcubic = cubic(fxy.x); 354 const vec4 ycubic = cubic(fxy.y); 355 356 const vec4 c = texCoords.xxyy + vec2(-0.5, 1.5).xyxy; 357 const vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); 358 const vec4 offset = (c + vec4(xcubic.yw, ycubic.yw) / s) * uPc.viewportSizeInvSize.zw.xxyy; 359 360 const vec4 aa = textureLod(sampler2D(uHistory, uSampler), offset.xz, 0); 361 const vec4 bb = textureLod(sampler2D(uHistory, uSampler), offset.yz, 0); 362 const vec4 cc = textureLod(sampler2D(uHistory, uSampler), offset.xw, 0); 363 const vec4 dd = textureLod(sampler2D(uHistory, uSampler), offset.yw, 0); 364 365 const float sx = s.x / (s.x + s.y); 366 const float sy = s.z / (s.z + s.w); 367 368 history = mix(mix(dd, cc, sx), mix(bb, aa, sx), sy); 369 } else { 370 history = textureLod(sampler2D(uHistory, uSampler), historyUv, 0.0); 371 } 372 373 if (useVarianceClipping) { 374 history = CalcVarianceClippedHistoryColor(history.rgb, currColor.rgb, useyCoCG); 375 } else { 376 history = CalcMinMaxClippedHistoryColor(history.rgb, currColor.rgb); 377 } 378 379 return history; 380} 381 382void UnpackFeatureToggles(out bool useBicubic, out bool useVarianceClipping, out bool useyCoCG, out bool ignoreEdges) { 383 // reflected in LumeRender/src/datastore/render_data_store_post_process.h:GetFactorTaa() 384 const uint combined = floatBitsToUint(uPc.factor.z); 385 386 useBicubic = (combined & (1u << TAA_USE_BICUBIC_BIT)) != 0u; 387 useVarianceClipping = (combined & (1u << TAA_USE_VARIANCE_CLIPPING_BIT)) != 0u; 388 useyCoCG = (combined & (1u << TAA_USE_YCOCG_BIT)) != 0u; 389 ignoreEdges = (combined & (1u << TAA_IGNORE_EDGES_BIT)) != 0u; 390} 391 392void main(void) { 393 bool isEdge; 394 const vec2 velocity = GetVelocity(isEdge); 395 const vec2 historyUv = inUv.xy - velocity; 396 397 bool useBicubic; 398 bool useVarianceClipping; 399 bool useyCoCG; 400 bool ignoreEdges; 401 UnpackFeatureToggles(useBicubic, useVarianceClipping, useyCoCG, ignoreEdges); 402 403 /** 404 * This is a bit of a hack since variance clipping causes flickering 405 * without specific mitigations. Some ideas are discussed here, but 406 * just disabling variance clipping for stationary elements is enough 407 * to remove the flickering without affecting the results otherwise. 408 * https://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx 409 */ 410 if (length(velocity) <= STATIONARY_VELOCITY_THRESHOLD) { 411 useVarianceClipping = false; 412 } 413 414 // Bicubic filtering makes edges look blurry; disable if an edge. 415 if (isEdge && ignoreEdges) { 416 useBicubic = false; 417 } 418 419 vec4 currColor = textureLod(sampler2D(uColor, uSampler), inUv.xy, 0.0); 420 vec4 history = GetHistory(historyUv, currColor, useBicubic, useVarianceClipping, useyCoCG); 421 422 // NOTE: add filtered option for less blurred history 423 424 const float blendWeight = uPc.factor.a; 425 426 // luma based tonemapping as suggested by Karis 427 const float historyLuma = CalcLuma(history.rgb); 428 const float colorLuma = CalcLuma(currColor.rgb); 429 430 history.rgb *= 1.0 / (1.0 + historyLuma); 431 currColor.rgb *= 1.0 / (1.0 + colorLuma); 432 433 vec4 color = mix(history, currColor, blendWeight); 434 435 // inverse tonemap 436 color.rgb *= 1.0 / (1.0 - CalcLuma(color.rgb)); 437 438 // safety for removing negative values 439 outColor = max(color, vec4(0.0)); 440} 441