1// 2// Copyright 2020 The ANGLE Project Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE file. 5// 6// GenerateMipmap.comp: Generate mipmap of texture in a single pass. Uses AMD's FFX SPD located in 7// third_parth/ffx_spd/. 8// 9// Note that due to bugs in that code, we only support downsampling 6 levels at a time (instead of 10// the 12 mips supported by FFX SPD). The issue is that FFX SPD tries to `imageLoad` from `dst[5]` 11// with coordinates that can potentially be outside the level extents. This results in transparent 12// black reads. A possible solution is to clamp the coodinates in `SpdLoad`. However, we opted to 13// supporting only 6 levels at a time because: 14// 15// - On most Android vendors, which is our primary optimization target, 16// maxPerStageDescriptorStorageImages is commonly 4, which means we cannot generate mipmaps for 17// even 6 levels at a time anyway. 18// - By removing support for >6 mips, we can remove the atomic counter logic required by FFX SPD to 19// single out an invocation which will be downsampling the rest of the 6 mips. This makes the 20// generation of the first 6 mips faster. 21 22#version 450 core 23 24#extension GL_GOOGLE_include_directive : require 25#extension GL_EXT_samplerless_texture_functions : require 26 27#if IsRGBA8 || IsRGBA8_UseHalf 28#define DST_FORMAT rgba8 29#elif IsRGBA16 || IsRGBA16_UseHalf 30#define DST_FORMAT rgba16 31#elif IsRGBA32F 32#define DST_FORMAT rgba32f 33#else 34#error "Not all formats are accounted for" 35#endif 36 37#if DestSize4 38#define DST_COUNT 4 39#elif DestSize6 40#define DST_COUNT 6 41#else 42#error "Not all destination sizes are accounted for" 43#endif 44 45// TODO: Support sRGB 46// TODO: Support non-float formats 47// TODO: Support subgroup mode 48 49layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; 50 51layout(set = 0, binding = 0, DST_FORMAT) uniform coherent image2D dst[DST_COUNT]; 52layout(set = 0, binding = 1) uniform sampler2D src; 53 54layout(push_constant) uniform PushConstants { 55 // Inverse extents of src image for uv calculation. 56 vec2 invSrcExtent; 57 // Number of levels to generate mipmaps for. 58 uint levelCount; 59} params; 60 61#define A_GPU 62#define A_GLSL 63 64// For 8- and 16-bit-per-channel images, use half instead of float if supported. 65#if IsRGBA8_UseHalf || IsRGBA16_UseHalf 66#define A_HALF 67#endif 68 69#include "third_party/ffx_spd/ffx_a.h" 70 71// Shared memory 72#ifdef A_HALF 73shared AH4 spd_intermediate[16][16]; 74#else 75shared AF4 spd_intermediate[16][16]; 76#endif 77shared AU1 spd_counter; 78 79// Use a linear sampler to sample from mip 0 instead of multiple loads and manual averaging. 80#define SPD_LINEAR_SAMPLER 81 82// Utility functions used by ffx_spd.h 83 84#ifdef A_HALF 85 86#define SPD_PACKED_ONLY 87 88// Load from source image 89AH4 SpdLoadSourceImageH(ASU2 p) 90{ 91 AF2 textureCoord = p * params.invSrcExtent + params.invSrcExtent; 92 return AH4(texture(src, textureCoord)); 93} 94 95// SpdLoadH() takes a 32-bit signed integer 2D coordinate and loads color. 96// Loads the 5th mip level, each value is computed by a different thread group 97// last thread group will access all its elements and compute the subsequent mips. 98// 99// Unused as we don't support more than 6 levels. 100AH4 SpdLoadH(ASU2 p) 101{ 102 return AH4(0); 103} 104 105// Define the store function 106void SpdStoreH(ASU2 p, AH4 value, AU1 mip) 107{ 108 imageStore(dst[mip], p, AF4(value)); 109} 110 111// Define the lds load and store functions 112AH4 SpdLoadIntermediateH(AU1 x, AU1 y) 113{ 114 return spd_intermediate[x][y]; 115} 116void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) 117{ 118 spd_intermediate[x][y] = value; 119} 120 121// Define your reduction function: takes as input the four 2x2 values and returns 1 output value 122AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) 123{ 124 return (v0 + v1 + v2 + v3) * AH1(0.25); 125} 126 127#else // A_HALF 128 129// Load from source image 130AF4 SpdLoadSourceImage(ASU2 p) 131{ 132 AF2 textureCoord = p * params.invSrcExtent + params.invSrcExtent; 133 return texture(src, textureCoord); 134} 135 136// SpdLoad() takes a 32-bit signed integer 2D coordinate and loads color. 137// Loads the 5th mip level, each value is computed by a different thread group 138// last thread group will access all its elements and compute the subsequent mips 139// 140// Unused as we don't support more than 6 levels. 141AF4 SpdLoad(ASU2 p) 142{ 143 return AF4(0); 144} 145 146// Define the store function 147void SpdStore(ASU2 p, AF4 value, AU1 mip) 148{ 149 imageStore(dst[mip], p, value); 150} 151 152// Define the LDS load and store functions 153AF4 SpdLoadIntermediate(AU1 x, AU1 y) 154{ 155 return spd_intermediate[x][y]; 156} 157void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) 158{ 159 spd_intermediate[x][y] = value; 160} 161 162// Define your reduction function: takes as input the four 2x2 values and returns 1 output value 163AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) 164{ 165 return (v0 + v1 + v2 + v3) * 0.25; 166} 167#endif // A_HALF 168 169// Define the atomic counter increase function. We don't support more than 6 mips, so these are 170// unused. Returned value is arbitrary as SpdDownsample will early out before looking at it. 171#define SpdIncreaseAtomicCounter() 172#define SpdGetAtomicCounter() 0 173 174#include "third_party/ffx_spd/ffx_spd.h" 175 176void main() 177{ 178#ifdef A_HALF 179 SpdDownsampleH(gl_WorkGroupID.xy, gl_LocalInvocationIndex, params.levelCount, 0); 180#else 181 SpdDownsample(gl_WorkGroupID.xy, gl_LocalInvocationIndex, params.levelCount, 0); 182#endif 183} 184