• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright 2020 The ANGLE Project Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6// GenerateMipmap.comp: Generate mipmap of texture in a single pass.  Uses AMD's FFX SPD located in
7// third_parth/ffx_spd/.
8//
9// Note that due to bugs in that code, we only support downsampling 6 levels at a time (instead of
10// the 12 mips supported by FFX SPD).  The issue is that FFX SPD tries to `imageLoad` from `dst[5]`
11// with coordinates that can potentially be outside the level extents.  This results in transparent
12// black reads.  A possible solution is to clamp the coodinates in `SpdLoad`.  However, we opted to
13// supporting only 6 levels at a time because:
14//
15// - On most Android vendors, which is our primary optimization target,
16//   maxPerStageDescriptorStorageImages is commonly 4, which means we cannot generate mipmaps for
17//   even 6 levels at a time anyway.
18// - By removing support for >6 mips, we can remove the atomic counter logic required by FFX SPD to
19//   single out an invocation which will be downsampling the rest of the 6 mips.  This makes the
20//   generation of the first 6 mips faster.
21
22#version 450 core
23
24#extension GL_GOOGLE_include_directive : require
25#extension GL_EXT_samplerless_texture_functions : require
26
27#if IsRGBA8 || IsRGBA8_UseHalf
28#define DST_FORMAT rgba8
29#elif IsRGBA16 || IsRGBA16_UseHalf
30#define DST_FORMAT rgba16
31#elif IsRGBA32F
32#define DST_FORMAT rgba32f
33#else
34#error "Not all formats are accounted for"
35#endif
36
37#if DestSize4
38#define DST_COUNT 4
39#elif DestSize6
40#define DST_COUNT 6
41#else
42#error "Not all destination sizes are accounted for"
43#endif
44
45// TODO: Support sRGB
46// TODO: Support non-float formats
47// TODO: Support subgroup mode
48
49layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
50
51layout(set = 0, binding = 0, DST_FORMAT) uniform coherent image2D dst[DST_COUNT];
52layout(set = 0, binding = 1) uniform sampler2D src;
53
54layout(push_constant) uniform PushConstants {
55    // Inverse extents of src image for uv calculation.
56    vec2 invSrcExtent;
57    // Number of levels to generate mipmaps for.
58    uint levelCount;
59} params;
60
61#define A_GPU
62#define A_GLSL
63
64// For 8- and 16-bit-per-channel images, use half instead of float if supported.
65#if IsRGBA8_UseHalf || IsRGBA16_UseHalf
66#define A_HALF
67#endif
68
69#include "third_party/ffx_spd/ffx_a.h"
70
71// Shared memory
72#ifdef A_HALF
73shared AH4 spd_intermediate[16][16];
74#else
75shared AF4 spd_intermediate[16][16];
76#endif
77shared AU1 spd_counter;
78
79// Use a linear sampler to sample from mip 0 instead of multiple loads and manual averaging.
80#define SPD_LINEAR_SAMPLER
81
82// Utility functions used by ffx_spd.h
83
84#ifdef A_HALF
85
86#define SPD_PACKED_ONLY
87
88// Load from source image
89AH4 SpdLoadSourceImageH(ASU2 p)
90{
91   AF2 textureCoord = p * params.invSrcExtent + params.invSrcExtent;
92   return AH4(texture(src, textureCoord));
93}
94
95// SpdLoadH() takes a 32-bit signed integer 2D coordinate and loads color.
96// Loads the 5th mip level, each value is computed by a different thread group
97// last thread group will access all its elements and compute the subsequent mips.
98//
99// Unused as we don't support more than 6 levels.
100AH4 SpdLoadH(ASU2 p)
101{
102    return AH4(0);
103}
104
105// Define the store function
106void SpdStoreH(ASU2 p, AH4 value, AU1 mip)
107{
108    imageStore(dst[mip], p, AF4(value));
109}
110
111// Define the lds load and store functions
112AH4 SpdLoadIntermediateH(AU1 x, AU1 y)
113{
114    return spd_intermediate[x][y];
115}
116void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value)
117{
118    spd_intermediate[x][y] = value;
119}
120
121// Define your reduction function: takes as input the four 2x2 values and returns 1 output value
122AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3)
123{
124    return (v0 + v1 + v2 + v3) * AH1(0.25);
125}
126
127#else  // A_HALF
128
129// Load from source image
130AF4 SpdLoadSourceImage(ASU2 p)
131{
132    AF2 textureCoord = p * params.invSrcExtent + params.invSrcExtent;
133    return texture(src, textureCoord);
134}
135
136// SpdLoad() takes a 32-bit signed integer 2D coordinate and loads color.
137// Loads the 5th mip level, each value is computed by a different thread group
138// last thread group will access all its elements and compute the subsequent mips
139//
140// Unused as we don't support more than 6 levels.
141AF4 SpdLoad(ASU2 p)
142{
143    return AF4(0);
144}
145
146// Define the store function
147void SpdStore(ASU2 p, AF4 value, AU1 mip)
148{
149    imageStore(dst[mip], p, value);
150}
151
152// Define the LDS load and store functions
153AF4 SpdLoadIntermediate(AU1 x, AU1 y)
154{
155    return spd_intermediate[x][y];
156}
157void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value)
158{
159    spd_intermediate[x][y] = value;
160}
161
162// Define your reduction function: takes as input the four 2x2 values and returns 1 output value
163AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
164{
165    return (v0 + v1 + v2 + v3) * 0.25;
166}
167#endif  // A_HALF
168
169// Define the atomic counter increase function.  We don't support more than 6 mips, so these are
170// unused.  Returned value is arbitrary as SpdDownsample will early out before looking at it.
171#define SpdIncreaseAtomicCounter()
172#define SpdGetAtomicCounter() 0
173
174#include "third_party/ffx_spd/ffx_spd.h"
175
176void main()
177{
178#ifdef A_HALF
179    SpdDownsampleH(gl_WorkGroupID.xy, gl_LocalInvocationIndex, params.levelCount, 0);
180#else
181    SpdDownsample(gl_WorkGroupID.xy, gl_LocalInvocationIndex, params.levelCount, 0);
182#endif
183}
184