1#version 450 core 2#extension GL_ARB_separate_shader_objects : enable 3#extension GL_ARB_shading_language_420pack : enable 4 5// includes 6 7#include "render/shaders/common/render_color_conversion_common.h" 8#include "render/shaders/common/render_post_process_common.h" 9#include "render/shaders/common/render_tonemap_common.h" 10 11// sets 12 13#include "render/shaders/common/render_post_process_layout_common.h" 14 15layout(set = 1, binding = 0) uniform texture2D uDepth; 16layout(set = 1, binding = 1) uniform texture2D uColor; 17layout(set = 1, binding = 2) uniform texture2D uVelocity; 18layout(set = 1, binding = 3) uniform texture2D uHistory; 19layout(set = 1, binding = 4) uniform sampler uSampler; 20 21// in / out 22 23layout(location = 0) in vec2 inUv; 24 25layout(location = 0) out vec4 outColor; 26 27// NOTE: cannot be used (remove if not used for any input) 28#define ENABLE_INPUT_ATTACHMENTS 0 29 30#define QUALITY_LOW 0 31#define QUALITY_MED 1 32#define QUALITY_HIGH 2 33 34// if the magnitude of the velocity vector is greater than 35// the threshold, the pixel is considered to be in motion. 36#define STATIONARY_VELOCITY_THRESHOLD 0.001 37// used to detect if the current pixel is an edge 38#define DEPTH_DIFF 0.0005 39 40float GetUnpackDepthBuffer(const vec2 uv) { 41#if (ENABLE_INPUT_ATTACHMENTS == 1) 42 return subpassLoad(uDepth).x; 43#else 44 return textureLod(sampler2D(uDepth, uSampler), uv, 0).x; 45#endif 46} 47 48vec2 GetUnpackVelocity(const vec2 uv, const vec2 invSize) { 49#if (ENABLE_INPUT_ATTACHMENTS == 1) 50 return subpassLoad(uVelocity).xy; 51#else 52 return textureLod(sampler2D(uVelocity, uSampler), uv, 0).xy * invSize; 53#endif 54} 55 56vec3 RGBToYCoCg(const vec3 rgb) { 57 const float co = rgb.r - rgb.b; 58 const float tmp = rgb.b + co / 2.0; 59 const float cg = rgb.g - tmp; 60 const float y = tmp + cg / 2.0; 61 return vec3(y, co, cg); 62} 63 64vec3 YCoCgToRGB(const vec3 ycocg) { 65 const float tmp = ycocg.r - ycocg.b / 2.0; 66 const float g = ycocg.b + tmp; 67 const float b = tmp - ycocg.g / 2.0; 68 const float r = ycocg.g + b; 69 return vec3(r, g, b); 70} 71 72vec2 GetVelocity(out bool isEdge) { 73 const uint quality = uint(uPc.factor.y + 0.5); 74 vec2 velUv = inUv; 75 76 if (quality == QUALITY_MED) { 77 // sample 5 values in a cross pattern 78 79 const uint offsetCount = 5; 80 const ivec2 offsets[offsetCount] = { 81 ivec2(-1, -1), 82 ivec2(1, -1), 83 ivec2(0, 0), 84 ivec2(-1, 1), 85 ivec2(1, 1), 86 }; 87 88 float depths[offsetCount]; 89 90 const ivec2 accessOffsets[4] = { 91 ivec2(-1, -1), 92 ivec2(1, -1), 93 ivec2(0, 0), 94 ivec2(-1, 1) 95 }; 96 const vec4 depth0123 = textureGatherOffsets(sampler2D(uDepth, uSampler), inUv.xy, accessOffsets, 0); 97 98 depths[0] = depth0123.x; 99 depths[1] = depth0123.y; 100 depths[2] = depth0123.z; 101 depths[3] = depth0123.w; 102 depths[4] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[4]).x; 103 104 const float currentDepth = depths[2]; 105 106 float minDepth = depths[0]; 107 float avgDepth = depths[0]; 108 int minDepthIndex = 0; 109 for (int ii = 1; ii < offsetCount; ++ii) { 110 if (depths[ii] < minDepth) { 111 minDepth = depths[ii]; 112 minDepthIndex = ii; 113 } 114 115 avgDepth += depths[ii]; 116 } 117 118 const ivec2 offset = offsets[minDepthIndex]; 119 velUv += offset * (uPc.viewportSizeInvSize.zw); 120 121 avgDepth /= float(offsetCount); 122 isEdge = abs(currentDepth - avgDepth) > DEPTH_DIFF ? true : false; 123 } 124 else if (quality >= QUALITY_HIGH) { 125 // sample a full 3x3 grid 126 127 const uint offsetCount = 9; 128 const ivec2 offsets[offsetCount] = { 129 // the first gather square 130 ivec2(-1, -1), 131 ivec2(-1, 0), 132 ivec2(0, -1), 133 ivec2(0, 0), 134 135 // the second gather square 136 // ivec2(0, 0), 137 ivec2(0, 1), 138 ivec2(1, 0), 139 ivec2(1, 1), 140 141 // the remaining corners 142 ivec2(1, -1), 143 ivec2(-1, 1) 144 }; 145 146 float depths[offsetCount]; 147 148 // textureGather samples from the given uv to uv + ivec2(1, 1) in a 2x2 pattern 149 const vec4 depth0123 = textureGather(sampler2D(uDepth, uSampler), inUv.xy - ivec2(-1, -1), 0); 150 const vec4 depth3456 = textureGather(sampler2D(uDepth, uSampler), inUv.xy, 0); 151 152 depths[0] = depth0123.x; 153 depths[1] = depth0123.y; 154 depths[2] = depth0123.z; 155 depths[3] = depth0123.w; 156 157 depths[4] = depth3456.y; 158 depths[5] = depth3456.z; 159 depths[6] = depth3456.w; 160 161 depths[7] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[7]).x; 162 depths[8] = textureLodOffset(sampler2D(uDepth, uSampler), inUv.xy, 0.0, offsets[8]).x; 163 164 const float currentDepth = depths[2]; 165 166 float minDepth = depths[0]; 167 float avgDepth = depths[0]; 168 int minDepthIndex = 0; 169 for (int ii = 1; ii < offsetCount; ++ii) { 170 if (depths[ii] < minDepth) { 171 minDepth = depths[ii]; 172 minDepthIndex = ii; 173 } 174 175 avgDepth += depths[ii]; 176 } 177 178 const ivec2 offset = offsets[minDepthIndex]; 179 velUv += offset * (uPc.viewportSizeInvSize.zw); 180 181 avgDepth /= float(offsetCount); 182 isEdge = abs(currentDepth - avgDepth) > DEPTH_DIFF ? true : false; 183 } 184 // multiply velocity to correct uv offsets 185 return textureLod(sampler2D(uVelocity, uSampler), velUv, 0).xy * uPc.viewportSizeInvSize.zw; 186} 187 188// clip towards aabb center 189// e.g. "temporal reprojection anti-aliasing in inside" 190vec4 ClipAabb(const vec3 aabbMin, const vec3 aabbMax, const vec4 color, const vec4 history) { 191 const vec3 pClip = 0.5 * (aabbMax + aabbMin); 192 const vec3 eClip = 0.5 * (aabbMax - aabbMin); 193 194 const vec4 vClip = history - vec4(pClip, color.w); 195 const vec3 vUnit = vClip.xyz - eClip; 196 const vec3 aUnit = abs(vUnit); 197 const float maUnit = max(aUnit.x, max(aUnit.y, aUnit.z)); 198 // if maUnit <= 1.0 the point is inside the aabb 199 const vec4 res = (maUnit > 1.0) ? (vec4(pClip, color.w) + vClip / maUnit) : color; 200 return res; 201} 202 203// clip the color to be inside a box defined by the center (mean) and size (variance or stdev) 204vec3 VarianceClipAABB(const vec3 history, const vec3 currColor, const vec3 center, const vec3 size) { 205 if (all(lessThanEqual(abs(history - center), size))) { 206 return history; 207 } 208 209 const vec3 dir = currColor - history; 210 const vec3 near = center - sign(dir) * size; 211 const vec3 tAll = (near - history) / dir; 212 213 // just some sufficiently large value 214 float t = 1e20; 215 216 for (int ii = 0; ii < 3; ii++) { 217 if (tAll[ii] >= 0.0 && tAll[ii] < t) { 218 t = tAll[ii]; 219 } 220 } 221 222 if (t >= 1e20) { 223 return history; 224 } 225 226 return history + dir * t; 227} 228 229// mean-variance clip history color to acceptable values within the current frame color 230vec4 CalcVarianceClippedHistoryColor(const vec3 history, const vec3 currColor, const bool useyCoCG) { 231 const uint quality = uint(uPc.factor.y + 0.5); 232 233 vec3 colors[9]; 234 uint numSamples = 0; 235 236 if (quality <= QUALITY_MED) { 237 // sample only in a cross pattern 238 numSamples = 5; 239 240 colors[0] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 241 colors[1] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 242 colors[2] = currColor; 243 colors[3] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 244 colors[4] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 245 } else { 246 // sample all 9 values within the 3x3 grid 247 numSamples = 9; 248 249 colors[0] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, -1)).rgb; 250 colors[1] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 251 colors[2] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, -1)).rgb; 252 colors[3] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 253 colors[4] = currColor; 254 colors[5] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 255 colors[6] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 1)).rgb; 256 colors[7] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 257 colors[8] = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 1)).rgb; 258 } 259 260 vec3 sum = vec3(0); 261 vec3 sumSq = vec3(0); 262 263 for (int ii = 0; ii < numSamples; ii++) { 264 vec3 value = colors[ii]; 265 if (useyCoCG) { 266 value = RGBToYCoCg(value); 267 } 268 sum += value; 269 sumSq += value * value; 270 } 271 272 const vec3 mean = sum / float(numSamples); 273 const vec3 variance = sqrt((sumSq / float(numSamples)) - mean * mean); 274 275 const vec3 minColor = mean - variance; 276 const vec3 maxColor = mean + variance; 277 278 vec3 clampedHistoryColor; 279 if (useyCoCG) { 280 clampedHistoryColor = YCoCgToRGB(VarianceClipAABB(RGBToYCoCg(history), RGBToYCoCg(currColor), mean, variance)); 281 } else { 282 clampedHistoryColor = VarianceClipAABB(history, currColor, mean, variance); 283 } 284 285 return vec4(clampedHistoryColor, 1.0); 286} 287 288vec4 CalcMinMaxClippedHistoryColor(const vec3 history, const vec3 currColor) { 289 // sample 3x3 grid 290 // 0 1 2 291 // 3 4 5 292 // 6 7 8 293 294 // Box filter for history 295 // diamond shape 296 vec3 bc1 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, -1)).rgb; 297 vec3 bc3 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 0)).rgb; 298 // center sample 299 vec3 bc4 = currColor; 300 vec3 min13 = min(min(bc1, bc3), bc4); 301 vec3 max13 = max(max(bc1, bc3), bc4); 302 303 vec3 bc5 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 0)).rgb; 304 vec3 bc7 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(0, 1)).rgb; 305 vec3 min57 = min(bc5, bc7); 306 vec3 max57 = max(bc5, bc7); 307 vec3 boxMin = min(min13, min57); 308 vec3 boxMax = max(max13, max57); 309 310 // 311 vec3 bc0 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, -1)).rgb; 312 vec3 bc2 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, -1)).rgb; 313 vec3 min02 = min(boxMin, min(bc0, bc2)); 314 vec3 max02 = max(boxMax, max(bc0, bc2)); 315 316 vec3 bc6 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(-1, 1)).rgb; 317 vec3 bc8 = textureLodOffset(sampler2D(uColor, uSampler), inUv.xy, 0.0, ivec2(1, 1)).rgb; 318 vec3 min68 = min(bc6, bc8); 319 vec3 max68 = max(bc6, bc8); 320 321 // corners 322 const vec3 boxMinCorner = min(min02, min68); 323 const vec3 boxMaxCorner = max(max02, max68); 324 325 boxMin = (boxMin + boxMinCorner) * 0.5; 326 boxMax = (boxMax + boxMaxCorner) * 0.5; 327 return vec4(clamp(history, boxMin, boxMax), 1.0); 328} 329 330vec4 cubic(const float value) { 331 const vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - value; 332 const vec4 s = n * n * n; 333 const float x = s.x; 334 const float y = s.y - 4.0 * s.x; 335 const float z = s.z - 4.0 * s.y + 6.0 * s.x; 336 const float w = 6.0 - x - y - z; 337 return vec4(x, y, z, w) * (1.0 / 6.0); 338} 339 340vec4 GetHistory(const vec2 historyUv, const vec4 currColor, const bool bicubic, 341 const bool useVarianceClipping, const bool useyCoCG) { 342 vec4 history; 343 344 if (bicubic) { 345 vec2 texCoords = historyUv * uPc.viewportSizeInvSize.xy - 0.5; 346 347 const vec2 fxy = fract(texCoords); 348 texCoords -= fxy; 349 350 const vec4 xcubic = cubic(fxy.x); 351 const vec4 ycubic = cubic(fxy.y); 352 353 const vec4 c = texCoords.xxyy + vec2(-0.5, 1.5).xyxy; 354 const vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); 355 const vec4 offset = (c + vec4(xcubic.yw, ycubic.yw) / s) * uPc.viewportSizeInvSize.zw.xxyy; 356 357 const vec4 aa = textureLod(sampler2D(uHistory, uSampler), offset.xz, 0); 358 const vec4 bb = textureLod(sampler2D(uHistory, uSampler), offset.yz, 0); 359 const vec4 cc = textureLod(sampler2D(uHistory, uSampler), offset.xw, 0); 360 const vec4 dd = textureLod(sampler2D(uHistory, uSampler), offset.yw, 0); 361 362 const float sx = s.x / (s.x + s.y); 363 const float sy = s.z / (s.z + s.w); 364 365 history = mix(mix(dd, cc, sx), mix(bb, aa, sx), sy); 366 } else { 367 history = textureLod(sampler2D(uHistory, uSampler), historyUv, 0.0); 368 } 369 370 if (useVarianceClipping) { 371 history = CalcVarianceClippedHistoryColor(history.rgb, currColor.rgb, useyCoCG); 372 } else { 373 history = CalcMinMaxClippedHistoryColor(history.rgb, currColor.rgb); 374 } 375 376 return history; 377} 378 379void UnpackFeatureToggles(out bool useBicubic, out bool useVarianceClipping, out bool useyCoCG, out bool ignoreEdges) { 380 // reflected in LumeRender/src/datastore/render_data_store_post_process.h:GetFactorTaa() 381 const uint combined = floatBitsToUint(uPc.factor.z); 382 383 useBicubic = (combined & (1u << TAA_USE_BICUBIC_BIT)) != 0u; 384 useVarianceClipping = (combined & (1u << TAA_USE_VARIANCE_CLIPPING_BIT)) != 0u; 385 useyCoCG = (combined & (1u << TAA_USE_YCOCG_BIT)) != 0u; 386 ignoreEdges = (combined & (1u << TAA_IGNORE_EDGES_BIT)) != 0u; 387} 388 389void main(void) { 390 bool isEdge; 391 const vec2 velocity = GetVelocity(isEdge); 392 const vec2 historyUv = inUv.xy - velocity; 393 394 bool useBicubic; 395 bool useVarianceClipping; 396 bool useyCoCG; 397 bool ignoreEdges; 398 UnpackFeatureToggles(useBicubic, useVarianceClipping, useyCoCG, ignoreEdges); 399 400 /** 401 * This is a bit of a hack since variance clipping causes flickering 402 * without specific mitigations. Some ideas are discussed here, but 403 * just disabling variance clipping for stationary elements is enough 404 * to remove the flickering without affecting the results otherwise. 405 * https://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx 406 */ 407 if (length(velocity) <= STATIONARY_VELOCITY_THRESHOLD) { 408 useVarianceClipping = false; 409 } 410 411 // Bicubic filtering makes edges look blurry; disable if an edge. 412 if (isEdge && ignoreEdges) { 413 useBicubic = false; 414 } 415 416 vec4 currColor = textureLod(sampler2D(uColor, uSampler), inUv.xy, 0.0); 417 vec4 history = GetHistory(historyUv, currColor, useBicubic, useVarianceClipping, useyCoCG); 418 419 // NOTE: add filtered option for less blurred history 420 421 const float blendWeight = uPc.factor.a; 422 423 // luma based tonemapping as suggested by Karis 424 const float historyLuma = CalcLuma(history.rgb); 425 const float colorLuma = CalcLuma(currColor.rgb); 426 427 history.rgb *= 1.0 / (1.0 + historyLuma); 428 currColor.rgb *= 1.0 / (1.0 + colorLuma); 429 430 vec4 color = mix(history, currColor, blendWeight); 431 432 // inverse tonemap 433 color.rgb *= 1.0 / (1.0 - CalcLuma(color.rgb)); 434 435 // safety for removing negative values 436 outColor = max(color, vec4(0.0)); 437} 438