1// 2// Copyright 2020 The ANGLE Project. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE file. 5// 6 7#include "common.h" 8 9using namespace rx::mtl_shader; 10 11#define kThreadGroupXYZ \ 12 (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim * \ 13 kGenerateMipThreadGroupSizePerDim) 14 15#define kThreadGroupXY (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim) 16#define kThreadGroupX kGenerateMipThreadGroupSizePerDim 17 18#define TEXEL_STORE(index, texel) \ 19 sR[index] = texel.r; \ 20 sG[index] = texel.g; \ 21 sB[index] = texel.b; \ 22 sA[index] = texel.a; 23 24#define TEXEL_LOAD(index) float4(sR[index], sG[index], sB[index], sA[index]) 25 26#define TO_LINEAR(texel) (options.sRGB ? sRGBtoLinear(texel) : texel) 27 28#define OUT_OF_BOUND_CHECK(edgeValue, targetValue, condition) \ 29 (condition) ? (edgeValue) : (targetValue) 30 31struct GenMipParams 32{ 33 uint srcLevel; 34 uint numMipLevelsToGen; 35 bool sRGB; 36}; 37 38// NOTE(hqle): For numMipLevelsToGen > 1, this function assumes the texture is power of two. If it 39// is not, quality will not be good. 40kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 41 ushort3 gIndices [[thread_position_in_grid]], 42 texture3d<float> srcTexture [[texture(0)]], 43 texture3d<float, access::write> dstMip1 [[texture(1)]], 44 texture3d<float, access::write> dstMip2 [[texture(2)]], 45 texture3d<float, access::write> dstMip3 [[texture(3)]], 46 texture3d<float, access::write> dstMip4 [[texture(4)]], 47 constant GenMipParams &options [[buffer(0)]]) 48{ 49 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 50 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 51 52 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 53 54 // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use. 55 56 // Use struct of array style to avoid bank conflict. 57 threadgroup float sR[kThreadGroupXYZ]; 58 threadgroup float sG[kThreadGroupXYZ]; 59 threadgroup float sB[kThreadGroupXYZ]; 60 threadgroup float sA[kThreadGroupXYZ]; 61 62 // ----- First mip level ------- 63 float4 texel1; 64 if (validThread) 65 { 66 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 67 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 68 69 // Write to texture 70 dstMip1.write(texel1, gIndices); 71 } 72 else 73 { 74 // This will invalidate all subsequent checks 75 lIndex = 0xffffffff; 76 } 77 78 if (options.numMipLevelsToGen == 1) 79 { 80 return; 81 } 82 83 // ---- Second mip level -------- 84 85 // Write to shared memory 86 if (options.sRGB) 87 { 88 texel1 = linearToSRGB(texel1); 89 } 90 TEXEL_STORE(lIndex, texel1); 91 92 threadgroup_barrier(mem_flags::mem_threadgroup); 93 94 // Index must be even 95 if ((lIndex & 0x49) == 0) // (lIndex & b1001001) == 0 96 { 97 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 98 99 // (x+1, y, z) 100 // If the width of mip is 1, texel2 will equal to texel1: 101 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); 102 // (x, y+1, z) 103 float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); 104 // (x, y, z+1) 105 float4 texel4 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupXY), atEdge.z); 106 // (x+1, y+1, z) 107 float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), 108 atEdge.x | atEdge.y); 109 // (x+1, y, z+1) 110 float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupXY + 1)), 111 atEdge.x | atEdge.z); 112 // (x, y+1, z+1) 113 float4 texel7 = OUT_OF_BOUND_CHECK( 114 texel3, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX)), atEdge.y | atEdge.z); 115 // (x+1, y+1, z+1) 116 float4 texel8 = 117 OUT_OF_BOUND_CHECK(texel5, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX + 1)), 118 atEdge.x | atEdge.y | atEdge.z); 119 120 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 121 122 dstMip2.write(TO_LINEAR(texel1), gIndices >> 1); 123 124 // Write to shared memory 125 TEXEL_STORE(lIndex, texel1); 126 } 127 128 if (options.numMipLevelsToGen == 2) 129 { 130 return; 131 } 132 133 // ---- 3rd mip level -------- 134 threadgroup_barrier(mem_flags::mem_threadgroup); 135 136 // Index must be multiple of 4 137 if ((lIndex & 0xdb) == 0) // (lIndex & b11011011) == 0 138 { 139 mipSize = max(mipSize >> 1, ushort3(1)); 140 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 141 142 // (x+1, y, z) 143 // If the width of mip is 1, texel2 will equal to texel1: 144 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); 145 // (x, y+1, z) 146 float4 texel3 = 147 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupX)), atEdge.y); 148 // (x, y, z+1) 149 float4 texel4 = 150 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY)), atEdge.z); 151 // (x+1, y+1, z) 152 float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), 153 atEdge.x | atEdge.y); 154 // (x+1, y, z+1) 155 float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2)), 156 atEdge.x | atEdge.z); 157 // (x, y+1, z+1) 158 float4 texel7 = OUT_OF_BOUND_CHECK( 159 texel3, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX)), 160 atEdge.y | atEdge.z); 161 // (x+1, y+1, z+1) 162 float4 texel8 = OUT_OF_BOUND_CHECK( 163 texel5, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX + 2)), 164 atEdge.x | atEdge.y | atEdge.z); 165 166 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 167 168 dstMip3.write(TO_LINEAR(texel1), gIndices >> 2); 169 170 // Write to shared memory 171 TEXEL_STORE(lIndex, texel1); 172 } 173 174 if (options.numMipLevelsToGen == 3) 175 { 176 return; 177 } 178 179 // ---- 4th mip level -------- 180 threadgroup_barrier(mem_flags::mem_threadgroup); 181 182 // Index must be multiple of 8 183 if ((lIndex & 0x1ff) == 0) // (lIndex & b111111111) == 0 184 { 185 mipSize = max(mipSize >> 1, ushort3(1)); 186 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 187 188 // (x+1, y, z) 189 // If the width of mip is 1, texel2 will equal to texel1: 190 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); 191 // (x, y+1, z) 192 float4 texel3 = 193 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupX)), atEdge.y); 194 // (x, y, z+1) 195 float4 texel4 = 196 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY)), atEdge.z); 197 // (x+1, y+1, z) 198 float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), 199 atEdge.x | atEdge.y); 200 // (x+1, y, z+1) 201 float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4)), 202 atEdge.x | atEdge.z); 203 // (x, y+1, z+1) 204 float4 texel7 = OUT_OF_BOUND_CHECK( 205 texel3, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX)), 206 atEdge.y | atEdge.z); 207 // (x+1, y+1, z+1) 208 float4 texel8 = OUT_OF_BOUND_CHECK( 209 texel5, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX + 4)), 210 atEdge.x | atEdge.y | atEdge.z); 211 212 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 213 214 dstMip4.write(TO_LINEAR(texel1), gIndices >> 3); 215 } 216} 217 218kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 219 ushort2 gIndices [[thread_position_in_grid]], 220 texture2d<float> srcTexture [[texture(0)]], 221 texture2d<float, access::write> dstMip1 [[texture(1)]], 222 texture2d<float, access::write> dstMip2 [[texture(2)]], 223 texture2d<float, access::write> dstMip3 [[texture(3)]], 224 texture2d<float, access::write> dstMip4 [[texture(4)]], 225 constant GenMipParams &options [[buffer(0)]]) 226{ 227 uint firstMipLevel = options.srcLevel + 1; 228 ushort2 mipSize = 229 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 230 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 231 232 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 233 234 // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use. 235 236 // Use struct of array style to avoid bank conflict. 237 threadgroup float sR[kThreadGroupXY]; 238 threadgroup float sG[kThreadGroupXY]; 239 threadgroup float sB[kThreadGroupXY]; 240 threadgroup float sA[kThreadGroupXY]; 241 242 // ----- First mip level ------- 243 float4 texel1; 244 if (validThread) 245 { 246 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 247 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 248 249 // Write to texture 250 dstMip1.write(TO_LINEAR(texel1), gIndices); 251 } 252 else 253 { 254 // This will invalidate all subsequent checks 255 lIndex = 0xffffffff; 256 } 257 258 if (options.numMipLevelsToGen == 1) 259 { 260 return; 261 } 262 263 // ---- Second mip level -------- 264 265 // Write to shared memory 266 TEXEL_STORE(lIndex, texel1); 267 268 threadgroup_barrier(mem_flags::mem_threadgroup); 269 270 // Index must be even 271 if ((lIndex & 0x09) == 0) // (lIndex & b001001) == 0 272 { 273 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 274 275 // (x+1, y) 276 // If the width of mip is 1, texel2 will equal to texel1: 277 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); 278 // (x, y+1) 279 float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); 280 // (x+1, y+1) 281 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), 282 atEdge.x | atEdge.y); 283 284 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 285 286 dstMip2.write(TO_LINEAR(texel1), gIndices >> 1); 287 288 // Write to shared memory 289 TEXEL_STORE(lIndex, texel1); 290 } 291 292 if (options.numMipLevelsToGen == 2) 293 { 294 return; 295 } 296 297 // ---- 3rd mip level -------- 298 threadgroup_barrier(mem_flags::mem_threadgroup); 299 300 // Index must be multiple of 4 301 if ((lIndex & 0x1b) == 0) // (lIndex & b011011) == 0 302 { 303 mipSize = max(mipSize >> 1, ushort2(1)); 304 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 305 306 // (x+1, y) 307 // If the width of mip is 1, texel2 will equal to texel1: 308 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); 309 // (x, y+1) 310 float4 texel3 = 311 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y); 312 // (x+1, y+1) 313 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), 314 atEdge.x | atEdge.y); 315 316 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 317 318 dstMip3.write(TO_LINEAR(texel1), gIndices >> 2); 319 320 // Write to shared memory 321 TEXEL_STORE(lIndex, texel1); 322 } 323 324 if (options.numMipLevelsToGen == 3) 325 { 326 return; 327 } 328 329 // ---- 4th mip level -------- 330 threadgroup_barrier(mem_flags::mem_threadgroup); 331 332 // Index must be multiple of 8 333 if ((lIndex & 0x3f) == 0) // (lIndex & b111111) == 0 334 { 335 mipSize = max(mipSize >> 1, ushort2(1)); 336 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 337 338 // (x+1, y) 339 // If the width of mip is 1, texel2 will equal to texel1: 340 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); 341 // (x, y+1) 342 float4 texel3 = 343 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y); 344 // (x+1, y+1) 345 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), 346 atEdge.x | atEdge.y); 347 348 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 349 350 dstMip4.write(TO_LINEAR(texel1), gIndices >> 3); 351 } 352} 353 354template <typename TextureTypeR, typename TextureTypeW> 355static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 356 uint lIndex, 357 ushort3 gIndices, 358 TextureTypeR srcTexture, 359 TextureTypeW dstMip2, 360 TextureTypeW dstMip3, 361 TextureTypeW dstMip4, 362 ushort2 mip1Size, 363 float4 mip1Texel, 364 threadgroup float *sR, 365 threadgroup float *sG, 366 threadgroup float *sB, 367 threadgroup float *sA, 368 constant GenMipParams &options) 369{ 370 ushort2 mipSize = mip1Size; 371 float4 texel1 = mip1Texel; 372 373 // ---- Second mip level -------- 374 375 // Write to shared memory 376 TEXEL_STORE(lIndex, texel1); 377 378 threadgroup_barrier(mem_flags::mem_threadgroup); 379 380 // Index must be even 381 if ((lIndex & 0x09) == 0) // (lIndex & b001001) == 0 382 { 383 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 384 385 // (x+1, y) 386 // If the width of mip is 1, texel2 will equal to texel1: 387 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); 388 // (x, y+1) 389 float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); 390 // (x+1, y+1) 391 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), 392 atEdge.x | atEdge.y); 393 394 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 395 396 dstMip2.write(TO_LINEAR(texel1), gIndices.xy >> 1, gIndices.z); 397 398 // Write to shared memory 399 TEXEL_STORE(lIndex, texel1); 400 } 401 402 if (options.numMipLevelsToGen == 2) 403 { 404 return; 405 } 406 407 // ---- 3rd mip level -------- 408 threadgroup_barrier(mem_flags::mem_threadgroup); 409 410 // Index must be multiple of 4 411 if ((lIndex & 0x1b) == 0) // (lIndex & b011011) == 0 412 { 413 mipSize = max(mipSize >> 1, ushort2(1)); 414 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 415 416 // (x+1, y) 417 // If the width of mip is 1, texel2 will equal to texel1: 418 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); 419 // (x, y+1) 420 float4 texel3 = 421 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y); 422 // (x+1, y+1) 423 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), 424 atEdge.x | atEdge.y); 425 426 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 427 428 dstMip3.write(TO_LINEAR(texel1), gIndices.xy >> 2, gIndices.z); 429 430 // Write to shared memory 431 TEXEL_STORE(lIndex, texel1); 432 } 433 434 if (options.numMipLevelsToGen == 3) 435 { 436 return; 437 } 438 439 // ---- 4th mip level -------- 440 threadgroup_barrier(mem_flags::mem_threadgroup); 441 442 // Index must be multiple of 8 443 if ((lIndex & 0x3f) == 0) // (lIndex & b111111) == 0 444 { 445 mipSize = max(mipSize >> 1, ushort2(1)); 446 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 447 448 // (x+1, y) 449 // If the width of mip is 1, texel2 will equal to texel1: 450 float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); 451 // (x, y+1) 452 float4 texel3 = 453 OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y); 454 // (x+1, y+1) 455 float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), 456 atEdge.x | atEdge.y); 457 458 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 459 460 dstMip4.write(TO_LINEAR(texel1), gIndices.xy >> 3, gIndices.z); 461 } 462} 463 464kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 465 ushort3 gIndices [[thread_position_in_grid]], 466 texturecube<float> srcTexture [[texture(0)]], 467 texturecube<float, access::write> dstMip1 [[texture(1)]], 468 texturecube<float, access::write> dstMip2 [[texture(2)]], 469 texturecube<float, access::write> dstMip3 [[texture(3)]], 470 texturecube<float, access::write> dstMip4 [[texture(4)]], 471 constant GenMipParams &options [[buffer(0)]]) 472{ 473 uint firstMipLevel = options.srcLevel + 1; 474 ushort2 mip1Size = 475 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 476 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 477 478 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 479 480 // ----- First mip level ------- 481 float4 mip1Texel; 482 if (validThread) 483 { 484 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 485 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 486 level(options.srcLevel)); 487 488 // Write to texture 489 dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z); 490 } 491 else 492 { 493 // This will invalidate all subsequent checks 494 lIndex = 0xffffffff; 495 } 496 497 if (options.numMipLevelsToGen == 1) 498 { 499 return; 500 } 501 502 // Use struct of array style to avoid bank conflict. 503 threadgroup float sR[kThreadGroupXY]; 504 threadgroup float sG[kThreadGroupXY]; 505 threadgroup float sB[kThreadGroupXY]; 506 threadgroup float sA[kThreadGroupXY]; 507 508 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 509 mip1Size, mip1Texel, sR, sG, sB, sA, options); 510} 511 512kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 513 ushort3 gIndices [[thread_position_in_grid]], 514 texture2d_array<float> srcTexture [[texture(0)]], 515 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 516 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 517 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 518 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 519 constant GenMipParams &options [[buffer(0)]]) 520{ 521 uint firstMipLevel = options.srcLevel + 1; 522 ushort2 mip1Size = 523 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 524 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 525 526 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 527 528 // ----- First mip level ------- 529 float4 mip1Texel; 530 if (validThread) 531 { 532 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 533 mip1Texel = 534 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 535 536 // Write to texture 537 dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z); 538 } 539 else 540 { 541 // This will invalidate all subsequent checks 542 lIndex = 0xffffffff; 543 } 544 545 if (options.numMipLevelsToGen == 1) 546 { 547 return; 548 } 549 550 // Use struct of array style to avoid bank conflict. 551 threadgroup float sR[kThreadGroupXY]; 552 threadgroup float sG[kThreadGroupXY]; 553 threadgroup float sB[kThreadGroupXY]; 554 threadgroup float sA[kThreadGroupXY]; 555 556 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 557 mip1Size, mip1Texel, sR, sG, sB, sA, options); 558} 559