• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright 2020 The ANGLE Project. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6
7#include "common.h"
8
9using namespace rx::mtl_shader;
10
11#define kThreadGroupXYZ                                                      \
12    (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim * \
13     kGenerateMipThreadGroupSizePerDim)
14
15#define kThreadGroupXY (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim)
16#define kThreadGroupX kGenerateMipThreadGroupSizePerDim
17
18#define TEXEL_STORE(index, texel) \
19    sR[index] = texel.r;          \
20    sG[index] = texel.g;          \
21    sB[index] = texel.b;          \
22    sA[index] = texel.a;
23
24#define TEXEL_LOAD(index) float4(sR[index], sG[index], sB[index], sA[index])
25
26#define TO_LINEAR(texel) (options.sRGB ? sRGBtoLinear(texel) : texel)
27
28#define OUT_OF_BOUND_CHECK(edgeValue, targetValue, condition) \
29    (condition) ? (edgeValue) : (targetValue)
30
31struct GenMipParams
32{
33    uint srcLevel;
34    uint numMipLevelsToGen;
35    bool sRGB;
36};
37
38// NOTE(hqle): For numMipLevelsToGen > 1, this function assumes the texture is power of two. If it
39// is not, quality will not be good.
40kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
41                              ushort3 gIndices [[thread_position_in_grid]],
42                              texture3d<float> srcTexture [[texture(0)]],
43                              texture3d<float, access::write> dstMip1 [[texture(1)]],
44                              texture3d<float, access::write> dstMip2 [[texture(2)]],
45                              texture3d<float, access::write> dstMip3 [[texture(3)]],
46                              texture3d<float, access::write> dstMip4 [[texture(4)]],
47                              constant GenMipParams &options [[buffer(0)]])
48{
49    ushort3 mipSize    = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
50    bool validThread   = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
51
52    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
53
54    // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use.
55
56    // Use struct of array style to avoid bank conflict.
57    threadgroup float sR[kThreadGroupXYZ];
58    threadgroup float sG[kThreadGroupXYZ];
59    threadgroup float sB[kThreadGroupXYZ];
60    threadgroup float sA[kThreadGroupXYZ];
61
62    // ----- First mip level -------
63    float4 texel1;
64    if (validThread)
65    {
66        float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
67        texel1           = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
68
69        // Write to texture
70        dstMip1.write(texel1, gIndices);
71    }
72    else
73    {
74        // This will invalidate all subsequent checks
75        lIndex = 0xffffffff;
76    }
77
78    if (options.numMipLevelsToGen == 1)
79    {
80        return;
81    }
82
83    // ---- Second mip level --------
84
85    // Write to shared memory
86    if (options.sRGB)
87    {
88        texel1 = linearToSRGB(texel1);
89    }
90    TEXEL_STORE(lIndex, texel1);
91
92    threadgroup_barrier(mem_flags::mem_threadgroup);
93
94    // Index must be even
95    if ((lIndex & 0x49) == 0)  // (lIndex & b1001001) == 0
96    {
97        bool3 atEdge = gIndices == (mipSize - ushort3(1));
98
99        // (x+1, y, z)
100        // If the width of mip is 1, texel2 will equal to texel1:
101        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x);
102        // (x, y+1, z)
103        float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y);
104        // (x, y, z+1)
105        float4 texel4 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupXY), atEdge.z);
106        // (x+1, y+1, z)
107        float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)),
108                                           atEdge.x | atEdge.y);
109        // (x+1, y, z+1)
110        float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupXY + 1)),
111                                           atEdge.x | atEdge.z);
112        // (x, y+1, z+1)
113        float4 texel7 = OUT_OF_BOUND_CHECK(
114            texel3, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX)), atEdge.y | atEdge.z);
115        // (x+1, y+1, z+1)
116        float4 texel8 =
117            OUT_OF_BOUND_CHECK(texel5, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX + 1)),
118                               atEdge.x | atEdge.y | atEdge.z);
119
120        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
121
122        dstMip2.write(TO_LINEAR(texel1), gIndices >> 1);
123
124        // Write to shared memory
125        TEXEL_STORE(lIndex, texel1);
126    }
127
128    if (options.numMipLevelsToGen == 2)
129    {
130        return;
131    }
132
133    // ---- 3rd mip level --------
134    threadgroup_barrier(mem_flags::mem_threadgroup);
135
136    // Index must be multiple of 4
137    if ((lIndex & 0xdb) == 0)  // (lIndex & b11011011) == 0
138    {
139        mipSize      = max(mipSize >> 1, ushort3(1));
140        bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
141
142        // (x+1, y, z)
143        // If the width of mip is 1, texel2 will equal to texel1:
144        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x);
145        // (x, y+1, z)
146        float4 texel3 =
147            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupX)), atEdge.y);
148        // (x, y, z+1)
149        float4 texel4 =
150            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY)), atEdge.z);
151        // (x+1, y+1, z)
152        float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)),
153                                           atEdge.x | atEdge.y);
154        // (x+1, y, z+1)
155        float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2)),
156                                           atEdge.x | atEdge.z);
157        // (x, y+1, z+1)
158        float4 texel7 = OUT_OF_BOUND_CHECK(
159            texel3, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX)),
160            atEdge.y | atEdge.z);
161        // (x+1, y+1, z+1)
162        float4 texel8 = OUT_OF_BOUND_CHECK(
163            texel5, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX + 2)),
164            atEdge.x | atEdge.y | atEdge.z);
165
166        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
167
168        dstMip3.write(TO_LINEAR(texel1), gIndices >> 2);
169
170        // Write to shared memory
171        TEXEL_STORE(lIndex, texel1);
172    }
173
174    if (options.numMipLevelsToGen == 3)
175    {
176        return;
177    }
178
179    // ---- 4th mip level --------
180    threadgroup_barrier(mem_flags::mem_threadgroup);
181
182    // Index must be multiple of 8
183    if ((lIndex & 0x1ff) == 0)  // (lIndex & b111111111) == 0
184    {
185        mipSize      = max(mipSize >> 1, ushort3(1));
186        bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
187
188        // (x+1, y, z)
189        // If the width of mip is 1, texel2 will equal to texel1:
190        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x);
191        // (x, y+1, z)
192        float4 texel3 =
193            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupX)), atEdge.y);
194        // (x, y, z+1)
195        float4 texel4 =
196            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY)), atEdge.z);
197        // (x+1, y+1, z)
198        float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)),
199                                           atEdge.x | atEdge.y);
200        // (x+1, y, z+1)
201        float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4)),
202                                           atEdge.x | atEdge.z);
203        // (x, y+1, z+1)
204        float4 texel7 = OUT_OF_BOUND_CHECK(
205            texel3, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX)),
206            atEdge.y | atEdge.z);
207        // (x+1, y+1, z+1)
208        float4 texel8 = OUT_OF_BOUND_CHECK(
209            texel5, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX + 4)),
210            atEdge.x | atEdge.y | atEdge.z);
211
212        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
213
214        dstMip4.write(TO_LINEAR(texel1), gIndices >> 3);
215    }
216}
217
218kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
219                              ushort2 gIndices [[thread_position_in_grid]],
220                              texture2d<float> srcTexture [[texture(0)]],
221                              texture2d<float, access::write> dstMip1 [[texture(1)]],
222                              texture2d<float, access::write> dstMip2 [[texture(2)]],
223                              texture2d<float, access::write> dstMip3 [[texture(3)]],
224                              texture2d<float, access::write> dstMip4 [[texture(4)]],
225                              constant GenMipParams &options [[buffer(0)]])
226{
227    uint firstMipLevel = options.srcLevel + 1;
228    ushort2 mipSize =
229        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
230    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
231
232    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
233
234    // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use.
235
236    // Use struct of array style to avoid bank conflict.
237    threadgroup float sR[kThreadGroupXY];
238    threadgroup float sG[kThreadGroupXY];
239    threadgroup float sB[kThreadGroupXY];
240    threadgroup float sA[kThreadGroupXY];
241
242    // ----- First mip level -------
243    float4 texel1;
244    if (validThread)
245    {
246        float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
247        texel1           = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
248
249        // Write to texture
250        dstMip1.write(TO_LINEAR(texel1), gIndices);
251    }
252    else
253    {
254        // This will invalidate all subsequent checks
255        lIndex = 0xffffffff;
256    }
257
258    if (options.numMipLevelsToGen == 1)
259    {
260        return;
261    }
262
263    // ---- Second mip level --------
264
265    // Write to shared memory
266    TEXEL_STORE(lIndex, texel1);
267
268    threadgroup_barrier(mem_flags::mem_threadgroup);
269
270    // Index must be even
271    if ((lIndex & 0x09) == 0)  // (lIndex & b001001) == 0
272    {
273        bool2 atEdge = gIndices == (mipSize - ushort2(1));
274
275        // (x+1, y)
276        // If the width of mip is 1, texel2 will equal to texel1:
277        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x);
278        // (x, y+1)
279        float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y);
280        // (x+1, y+1)
281        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)),
282                                           atEdge.x | atEdge.y);
283
284        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
285
286        dstMip2.write(TO_LINEAR(texel1), gIndices >> 1);
287
288        // Write to shared memory
289        TEXEL_STORE(lIndex, texel1);
290    }
291
292    if (options.numMipLevelsToGen == 2)
293    {
294        return;
295    }
296
297    // ---- 3rd mip level --------
298    threadgroup_barrier(mem_flags::mem_threadgroup);
299
300    // Index must be multiple of 4
301    if ((lIndex & 0x1b) == 0)  // (lIndex & b011011) == 0
302    {
303        mipSize      = max(mipSize >> 1, ushort2(1));
304        bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
305
306        // (x+1, y)
307        // If the width of mip is 1, texel2 will equal to texel1:
308        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x);
309        // (x, y+1)
310        float4 texel3 =
311            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y);
312        // (x+1, y+1)
313        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)),
314                                           atEdge.x | atEdge.y);
315
316        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
317
318        dstMip3.write(TO_LINEAR(texel1), gIndices >> 2);
319
320        // Write to shared memory
321        TEXEL_STORE(lIndex, texel1);
322    }
323
324    if (options.numMipLevelsToGen == 3)
325    {
326        return;
327    }
328
329    // ---- 4th mip level --------
330    threadgroup_barrier(mem_flags::mem_threadgroup);
331
332    // Index must be multiple of 8
333    if ((lIndex & 0x3f) == 0)  // (lIndex & b111111) == 0
334    {
335        mipSize      = max(mipSize >> 1, ushort2(1));
336        bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
337
338        // (x+1, y)
339        // If the width of mip is 1, texel2 will equal to texel1:
340        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x);
341        // (x, y+1)
342        float4 texel3 =
343            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y);
344        // (x+1, y+1)
345        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)),
346                                           atEdge.x | atEdge.y);
347
348        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
349
350        dstMip4.write(TO_LINEAR(texel1), gIndices >> 3);
351    }
352}
353
354template <typename TextureTypeR, typename TextureTypeW>
355static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
356    uint lIndex,
357    ushort3 gIndices,
358    TextureTypeR srcTexture,
359    TextureTypeW dstMip2,
360    TextureTypeW dstMip3,
361    TextureTypeW dstMip4,
362    ushort2 mip1Size,
363    float4 mip1Texel,
364    threadgroup float *sR,
365    threadgroup float *sG,
366    threadgroup float *sB,
367    threadgroup float *sA,
368    constant GenMipParams &options)
369{
370    ushort2 mipSize = mip1Size;
371    float4 texel1   = mip1Texel;
372
373    // ---- Second mip level --------
374
375    // Write to shared memory
376    TEXEL_STORE(lIndex, texel1);
377
378    threadgroup_barrier(mem_flags::mem_threadgroup);
379
380    // Index must be even
381    if ((lIndex & 0x09) == 0)  // (lIndex & b001001) == 0
382    {
383        bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
384
385        // (x+1, y)
386        // If the width of mip is 1, texel2 will equal to texel1:
387        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x);
388        // (x, y+1)
389        float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y);
390        // (x+1, y+1)
391        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)),
392                                           atEdge.x | atEdge.y);
393
394        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
395
396        dstMip2.write(TO_LINEAR(texel1), gIndices.xy >> 1, gIndices.z);
397
398        // Write to shared memory
399        TEXEL_STORE(lIndex, texel1);
400    }
401
402    if (options.numMipLevelsToGen == 2)
403    {
404        return;
405    }
406
407    // ---- 3rd mip level --------
408    threadgroup_barrier(mem_flags::mem_threadgroup);
409
410    // Index must be multiple of 4
411    if ((lIndex & 0x1b) == 0)  // (lIndex & b011011) == 0
412    {
413        mipSize      = max(mipSize >> 1, ushort2(1));
414        bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
415
416        // (x+1, y)
417        // If the width of mip is 1, texel2 will equal to texel1:
418        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x);
419        // (x, y+1)
420        float4 texel3 =
421            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y);
422        // (x+1, y+1)
423        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)),
424                                           atEdge.x | atEdge.y);
425
426        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
427
428        dstMip3.write(TO_LINEAR(texel1), gIndices.xy >> 2, gIndices.z);
429
430        // Write to shared memory
431        TEXEL_STORE(lIndex, texel1);
432    }
433
434    if (options.numMipLevelsToGen == 3)
435    {
436        return;
437    }
438
439    // ---- 4th mip level --------
440    threadgroup_barrier(mem_flags::mem_threadgroup);
441
442    // Index must be multiple of 8
443    if ((lIndex & 0x3f) == 0)  // (lIndex & b111111) == 0
444    {
445        mipSize      = max(mipSize >> 1, ushort2(1));
446        bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
447
448        // (x+1, y)
449        // If the width of mip is 1, texel2 will equal to texel1:
450        float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x);
451        // (x, y+1)
452        float4 texel3 =
453            OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y);
454        // (x+1, y+1)
455        float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)),
456                                           atEdge.x | atEdge.y);
457
458        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
459
460        dstMip4.write(TO_LINEAR(texel1), gIndices.xy >> 3, gIndices.z);
461    }
462}
463
464kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
465                                ushort3 gIndices [[thread_position_in_grid]],
466                                texturecube<float> srcTexture [[texture(0)]],
467                                texturecube<float, access::write> dstMip1 [[texture(1)]],
468                                texturecube<float, access::write> dstMip2 [[texture(2)]],
469                                texturecube<float, access::write> dstMip3 [[texture(3)]],
470                                texturecube<float, access::write> dstMip4 [[texture(4)]],
471                                constant GenMipParams &options [[buffer(0)]])
472{
473    uint firstMipLevel = options.srcLevel + 1;
474    ushort2 mip1Size =
475        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
476    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
477
478    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
479
480    // ----- First mip level -------
481    float4 mip1Texel;
482    if (validThread)
483    {
484        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
485        mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
486                                      level(options.srcLevel));
487
488        // Write to texture
489        dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z);
490    }
491    else
492    {
493        // This will invalidate all subsequent checks
494        lIndex = 0xffffffff;
495    }
496
497    if (options.numMipLevelsToGen == 1)
498    {
499        return;
500    }
501
502    // Use struct of array style to avoid bank conflict.
503    threadgroup float sR[kThreadGroupXY];
504    threadgroup float sG[kThreadGroupXY];
505    threadgroup float sB[kThreadGroupXY];
506    threadgroup float sA[kThreadGroupXY];
507
508    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
509                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
510}
511
512kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
513                                   ushort3 gIndices [[thread_position_in_grid]],
514                                   texture2d_array<float> srcTexture [[texture(0)]],
515                                   texture2d_array<float, access::write> dstMip1 [[texture(1)]],
516                                   texture2d_array<float, access::write> dstMip2 [[texture(2)]],
517                                   texture2d_array<float, access::write> dstMip3 [[texture(3)]],
518                                   texture2d_array<float, access::write> dstMip4 [[texture(4)]],
519                                   constant GenMipParams &options [[buffer(0)]])
520{
521    uint firstMipLevel = options.srcLevel + 1;
522    ushort2 mip1Size =
523        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
524    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
525
526    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
527
528    // ----- First mip level -------
529    float4 mip1Texel;
530    if (validThread)
531    {
532        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
533        mip1Texel =
534            srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
535
536        // Write to texture
537        dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z);
538    }
539    else
540    {
541        // This will invalidate all subsequent checks
542        lIndex = 0xffffffff;
543    }
544
545    if (options.numMipLevelsToGen == 1)
546    {
547        return;
548    }
549
550    // Use struct of array style to avoid bank conflict.
551    threadgroup float sR[kThreadGroupXY];
552    threadgroup float sG[kThreadGroupXY];
553    threadgroup float sB[kThreadGroupXY];
554    threadgroup float sA[kThreadGroupXY];
555
556    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
557                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
558}
559