1// GENERATED FILE - DO NOT EDIT. 2// Generated by gen_mtl_internal_shaders.py 3// 4// Copyright 2020 The ANGLE Project Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7// 8 9// C++ string version of combined Metal default shaders. 10 11 12 13static char gDefaultMetallibSrc[] = R"( 14# 1 "temp_master_source.metal" 15# 1 "<built-in>" 1 16# 1 "<built-in>" 3 17# 433 "<built-in>" 3 18# 1 "<command line>" 1 19# 1 "<built-in>" 2 20# 1 "temp_master_source.metal" 2 21# 1 "./blit.metal" 1 22 23 24 25 26 27 28 29# 1 "./common.h" 1 30# 13 "./common.h" 31# include <simd/simd.h> 32# include <metal_stdlib> 33 34 35# 1 "./constants.h" 1 36# 11 "./constants.h" 37namespace rx 38{ 39namespace mtl_shader 40{ 41 42enum 43{ 44 kTextureType2D = 0, 45 kTextureType2DMultisample = 1, 46 kTextureType2DArray = 2, 47 kTextureTypeCube = 3, 48 kTextureType3D = 4, 49 kTextureTypeCount = 5, 50}; 51 52 53 54 55} 56} 57# 18 "./common.h" 2 58 59 60 61 62 63 64 65using namespace metal; 66 67 68 69constant uint32_t kNumColorOutputs [[function_constant(0)]]; 70constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 71constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 72constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 73constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 74 75namespace rx 76{ 77namespace mtl_shader 78{ 79 80 81constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 82 83template <typename T> 84struct MultipleColorOutputs 85{ 86 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 87 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 88 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 89 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 90}; 91# 61 "./common.h" 92template <typename T> 93static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 94{ 95 MultipleColorOutputs<T> re; 96 97 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 98 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 99 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 100 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 101 102 return re; 103} 104 105static inline float3 cubeTexcoords(float2 texcoords, int face) 106{ 107 texcoords = 2.0 * texcoords - 1.0; 108 switch (face) 109 { 110 case 0: 111 return float3(1.0, -texcoords.y, -texcoords.x); 112 case 1: 113 return float3(-1.0, -texcoords.y, texcoords.x); 114 case 2: 115 return float3(texcoords.x, 1.0, texcoords.y); 116 case 3: 117 return float3(texcoords.x, -1.0, -texcoords.y); 118 case 4: 119 return float3(texcoords.x, -texcoords.y, 1.0); 120 case 5: 121 return float3(-texcoords.x, -texcoords.y, -1.0); 122 } 123 return float3(texcoords, 0); 124} 125 126template <typename T> 127static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 128{ 129 uint samples = srcTexture.get_num_samples(); 130 131 vec<T, 4> output(0); 132 133 for (uint sample = 0; sample < samples; ++sample) 134 { 135 output += srcTexture.read(coords, sample); 136 } 137 138 output = output / samples; 139 140 return output; 141} 142 143static inline float4 sRGBtoLinear(float4 color) 144{ 145 float3 linear1 = color.rgb / 12.92; 146 float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4); 147 float3 factor = float3(color.rgb <= float3(0.04045)); 148 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 149 150 return linear; 151} 152 153static inline float linearToSRGB(float color) 154{ 155 if (color <= 0.0f) 156 return 0.0f; 157 else if (color < 0.0031308f) 158 return 12.92f * color; 159 else if (color < 1.0f) 160 return 1.055f * pow(color, 0.41666f) - 0.055f; 161 else 162 return 1.0f; 163} 164 165static inline float4 linearToSRGB(float4 color) 166{ 167 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 168} 169 170template <typename Short> 171static inline Short bytesToShort(constant uchar *input, uint offset) 172{ 173 Short inputLo = input[offset]; 174 Short inputHi = input[offset + 1]; 175 176 return inputLo | (inputHi << 8); 177} 178 179template <typename Int> 180static inline Int bytesToInt(constant uchar *input, uint offset) 181{ 182 Int input0 = input[offset]; 183 Int input1 = input[offset + 1]; 184 Int input2 = input[offset + 2]; 185 Int input3 = input[offset + 3]; 186 187 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 188} 189 190template <typename Short> 191static inline void shortToBytes(Short val, uint offset, device uchar *output) 192{ 193 ushort valUnsigned = as_type<ushort>(val); 194 output[offset] = valUnsigned & 0xff; 195 output[offset + 1] = (valUnsigned >> 8) & 0xff; 196} 197 198template <typename Int> 199static inline void intToBytes(Int val, uint offset, device uchar *output) 200{ 201 uint valUnsigned = as_type<uint>(val); 202 output[offset] = valUnsigned & 0xff; 203 output[offset + 1] = (valUnsigned >> 8) & 0xff; 204 output[offset + 2] = (valUnsigned >> 16) & 0xff; 205 output[offset + 3] = (valUnsigned >> 24) & 0xff; 206} 207 208static inline void floatToBytes(float val, uint offset, device uchar *output) 209{ 210 intToBytes(as_type<uint>(val), offset, output); 211} 212 213static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 214{ 215 output[offset] = val & 0xff; 216 output[offset + 1] = (val >> 8) & 0xff; 217 output[offset + 2] = (val >> 16) & 0xff; 218} 219 220template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 221static inline T getShiftedData(T input) 222{ 223 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 224 "T must have at least as many bits as inputBitCount + inputBitStart."); 225 const T mask = (1 << inputBitCount) - 1; 226 return (input >> inputBitStart) & mask; 227} 228 229template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 230static inline T shiftData(T input) 231{ 232 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 233 "T must have at least as many bits as inputBitCount + inputBitStart."); 234 const T mask = (1 << inputBitCount) - 1; 235 return (input & mask) << inputBitStart; 236} 237 238template <unsigned int inputBitCount, typename T> 239static inline float normalizedToFloat(T input) 240{ 241 static_assert(inputBitCount <= (sizeof(T) * 8), 242 "T must have more bits than or same bits as inputBitCount."); 243 static_assert(inputBitCount <= 23, "Only single precision is supported"); 244 245 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 246 return input * inverseMax; 247} 248 249template <typename T> 250static inline float normalizedToFloat(T input) 251{ 252 return normalizedToFloat<sizeof(T) * 8, T>(input); 253} 254 255template <> 256inline float normalizedToFloat(short input) 257{ 258 constexpr float inverseMax = 1.0f / 0x7fff; 259 return static_cast<float>(input) * inverseMax; 260} 261 262template <> 263inline float normalizedToFloat(int input) 264{ 265 constexpr float inverseMax = 1.0f / 0x7fffffff; 266 return static_cast<float>(input) * inverseMax; 267} 268 269template <> 270inline float normalizedToFloat(uint input) 271{ 272 constexpr float inverseMax = 1.0f / 0xffffffff; 273 return static_cast<float>(input) * inverseMax; 274} 275 276template <unsigned int outputBitCount, typename T> 277static inline T floatToNormalized(float input) 278{ 279 static_assert(outputBitCount <= (sizeof(T) * 8), 280 "T must have more bits than or same bits as inputBitCount."); 281 static_assert(outputBitCount <= 23, "Only single precision is supported"); 282 283 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f); 284} 285 286template <typename T> 287static inline T floatToNormalized(float input) 288{ 289 return floatToNormalized<sizeof(T) * 8, T>(input); 290} 291 292} 293} 294# 9 "./blit.metal" 2 295 296using namespace rx::mtl_shader; 297 298 299constant bool kPremultiplyAlpha [[function_constant(1)]]; 300constant bool kUnmultiplyAlpha [[function_constant(2)]]; 301constant int kSourceTextureType [[function_constant(3)]]; 302constant int kSourceTexture2Type [[function_constant(4)]]; 303 304constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 305constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 306constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 307constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 308constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 309 310constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 311constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 312constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 313constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 314 315struct BlitParams 316{ 317 318 float2 srcTexCoords[3]; 319 int srcLevel; 320 int srcLayer; 321 bool dstFlipViewportX; 322 bool dstFlipViewportY; 323 bool dstLuminance; 324 uint8_t padding[13]; 325}; 326 327struct BlitVSOut 328{ 329 float4 position [[position]]; 330 float2 texCoords [[user(locn1)]]; 331}; 332 333vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 334{ 335 BlitVSOut output; 336 output.position = float4(gCorners[vid], 0.0, 1.0); 337 output.texCoords = options.srcTexCoords[vid]; 338 339 if (options.dstFlipViewportX) 340 { 341 output.position.x = -output.position.x; 342 } 343 if (!options.dstFlipViewportY) 344 { 345 346 347 output.position.y = -output.position.y; 348 } 349 350 return output; 351} 352 353template <typename SrcTexture2d> 354static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 355{ 356 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 357 uint2 coords = uint2(texCoords * float2(dimens)); 358 359 return coords; 360} 361 362template <typename T> 363static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 364{ 365 uint2 coords = getImageCoords(srcTexture, texCoords); 366 return resolveTextureMS(srcTexture, coords); 367} 368 369template <typename T> 370static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 371 sampler textureSampler, 372 float2 texCoords, 373 constant BlitParams &options) 374{ 375 uint depth = srcTexture.get_depth(options.srcLevel); 376 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 377 378 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 379} 380# 112 "./blit.metal" 381template <typename T> 382static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 383{ 384 vec<T, 4> output; 385 386 switch (kSourceTextureType) 387 { 388 case kTextureType2D: 389 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 390 break; 391 case kTextureType2DArray: 392 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 393 level(options.srcLevel)); 394 break; 395 case kTextureType2DMultisample: 396 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 397 break; 398 case kTextureTypeCube: 399 output = srcTextureCube.sample(textureSampler, 400 cubeTexcoords(input.texCoords, options.srcLayer), 401 level(options.srcLevel)); 402 break; 403 case kTextureType3D: 404 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 405 break; 406 } 407 408 if (kPremultiplyAlpha) 409 { 410 output.xyz *= output.a; 411 } 412 else if (kUnmultiplyAlpha) 413 { 414 if (output.a != 0.0) 415 { 416 output.xyz /= output.a; 417 } 418 } 419 420 if (options.dstLuminance) 421 { 422 output.g = output.b = output.r; 423 } 424 425 return output; 426} 427 428template <typename T> 429static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 430{ 431 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 432 433 return toMultipleColorOutputs(output); 434} 435 436fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 437{ 438 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 439} 440fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 441{ 442 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 443} 444fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 445{ 446 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 447} 448 449fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 450{ 451 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 452 uint4 output = uint4(inputColor * float4(255.0)); 453 454 return toMultipleColorOutputs(output); 455} 456 457 458struct FragmentDepthOut 459{ 460 float depth [[depth(any)]]; 461}; 462 463static inline float sampleDepth( 464 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 465 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 466 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 467 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 468 float2 texCoords, 469 constant BlitParams &options) 470{ 471 float4 output; 472 473 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 474 475 switch (kSourceTextureType) 476 { 477 case kTextureType2D: 478 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 479 break; 480 case kTextureType2DArray: 481 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 482 level(options.srcLevel)); 483 break; 484 case kTextureType2DMultisample: 485 486 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 487 break; 488 case kTextureTypeCube: 489 output = 490 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 491 level(options.srcLevel)); 492 break; 493 } 494 495 return output.r; 496} 497 498fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 499 texture2d<float> srcTexture2d 500 [[texture(0), function_constant(kSourceTextureType2D)]], 501 texture2d_array<float> srcTexture2dArray 502 [[texture(0), function_constant(kSourceTextureType2DArray)]], 503 texture2d_ms<float> srcTexture2dMS 504 [[texture(0), function_constant(kSourceTextureType2DMS)]], 505 texturecube<float> srcTextureCube 506 [[texture(0), function_constant(kSourceTextureTypeCube)]], 507 constant BlitParams &options [[buffer(0)]]) 508{ 509 FragmentDepthOut re; 510 511 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 512 input.texCoords, options); 513 514 return re; 515} 516 517static inline uint32_t sampleStencil( 518 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 519 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 520 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 521 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 522 float2 texCoords, 523 int srcLevel, 524 int srcLayer) 525{ 526 uint4 output; 527 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 528 529 switch (kSourceTexture2Type) 530 { 531 case kTextureType2D: 532 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 533 break; 534 case kTextureType2DArray: 535 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 536 break; 537 case kTextureType2DMultisample: 538 539 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 540 break; 541 case kTextureTypeCube: 542 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 543 level(srcLevel)); 544 break; 545 } 546 547 return output.r; 548} 549 550 551struct BlitStencilToBufferParams 552{ 553 float2 srcStartTexCoords; 554 float2 srcTexCoordSteps; 555 int srcLevel; 556 int srcLayer; 557 558 uint2 dstSize; 559 uint dstBufferRowPitch; 560 561 bool resolveMS; 562}; 563 564kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 565 texture2d<uint32_t> srcTexture2d 566 [[texture(1), function_constant(kSourceTexture2Type2D)]], 567 texture2d_array<uint32_t> srcTexture2dArray 568 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 569 texture2d_ms<uint32_t> srcTexture2dMS 570 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 571 texturecube<uint32_t> srcTextureCube 572 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 573 constant BlitStencilToBufferParams &options [[buffer(0)]], 574 device uchar *buffer [[buffer(1)]]) 575{ 576 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 577 { 578 return; 579 } 580 581 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 582 583 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 584 { 585 uint samples = srcTexture2dMS.get_num_samples(); 586 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 587 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 588 589 for (uint sample = 0; sample < samples; ++sample) 590 { 591 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 592 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 593 } 594 } 595 else 596 { 597 uint32_t stencil = 598 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 599 srcTexCoords, options.srcLevel, options.srcLayer); 600 601 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 602 } 603} 604 605 606#if __METAL_VERSION__ >= 210 607 608struct FragmentStencilOut 609{ 610 uint32_t stencil [[stencil]]; 611}; 612 613struct FragmentDepthStencilOut 614{ 615 float depth [[depth(any)]]; 616 uint32_t stencil [[stencil]]; 617}; 618 619fragment FragmentStencilOut blitStencilFS( 620 BlitVSOut input [[stage_in]], 621 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 622 texture2d_array<uint32_t> srcTexture2dArray 623 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 624 texture2d_ms<uint32_t> srcTexture2dMS 625 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 626 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 627 constant BlitParams &options [[buffer(0)]]) 628{ 629 FragmentStencilOut re; 630 631 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 632 input.texCoords, options.srcLevel, options.srcLayer); 633 634 return re; 635} 636 637fragment FragmentDepthStencilOut blitDepthStencilFS( 638 BlitVSOut input [[stage_in]], 639 640 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 641 texture2d_array<float> srcDepthTexture2dArray 642 [[texture(0), function_constant(kSourceTextureType2DArray)]], 643 texture2d_ms<float> srcDepthTexture2dMS 644 [[texture(0), function_constant(kSourceTextureType2DMS)]], 645 texturecube<float> srcDepthTextureCube 646 [[texture(0), function_constant(kSourceTextureTypeCube)]], 647 648 649 texture2d<uint32_t> srcStencilTexture2d 650 [[texture(1), function_constant(kSourceTexture2Type2D)]], 651 texture2d_array<uint32_t> srcStencilTexture2dArray 652 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 653 texture2d_ms<uint32_t> srcStencilTexture2dMS 654 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 655 texturecube<uint32_t> srcStencilTextureCube 656 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 657 658 constant BlitParams &options [[buffer(0)]]) 659{ 660 FragmentDepthStencilOut re; 661 662 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 663 srcDepthTextureCube, input.texCoords, options); 664 re.stencil = 665 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 666 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 667 return re; 668} 669#endif 670# 2 "temp_master_source.metal" 2 671# 1 "./clear.metal" 1 672# 10 "./clear.metal" 673using namespace rx::mtl_shader; 674 675struct ClearParams 676{ 677 float4 clearColor; 678 float clearDepth; 679}; 680 681vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 682 constant ClearParams &clearParams [[buffer(0)]]) 683{ 684 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 685} 686 687fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 688{ 689 return toMultipleColorOutputs(clearParams.clearColor); 690} 691 692fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 693{ 694 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 695} 696 697fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 698{ 699 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 700} 701# 3 "temp_master_source.metal" 2 702# 1 "./gen_indices.metal" 1 703 704 705 706 707 708 709 710 711using namespace rx::mtl_shader; 712 713 714constant bool kSourceBufferAligned[[function_constant(100)]]; 715constant bool kSourceIndexIsU8[[function_constant(200)]]; 716constant bool kSourceIndexIsU16[[function_constant(300)]]; 717constant bool kSourceIndexIsU32[[function_constant(400)]]; 718constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 719constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 720constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 721constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 722 723struct IndexConversionParams 724{ 725 uint32_t srcOffset; 726 uint32_t indexCount; 727 bool primitiveRestartEnabled; 728}; 729 730 731 732inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 733{ 734 return inputAligned[offset / 2 + idx]; 735} 736inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 737{ 738 return inputAligned[offset / 4 + idx]; 739} 740inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 741{ 742 return input[offset + idx]; 743} 744inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 745{ 746 ushort inputLo = input[offset + 2 * idx]; 747 ushort inputHi = input[offset + 2 * idx + 1]; 748 749 return inputLo | (inputHi << 8); 750} 751inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 752{ 753 uint input0 = input[offset + 4 * idx]; 754 uint input1 = input[offset + 4 * idx + 1]; 755 uint input2 = input[offset + 4 * idx + 2]; 756 uint input3 = input[offset + 4 * idx + 3]; 757 758 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 759} 760 761kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 762 constant IndexConversionParams &options [[buffer(0)]], 763 constant uchar *input [[buffer(1)]], 764 device ushort *output [[buffer(2)]]) 765{ 766 if (idx >= options.indexCount) { return; }; 767 768 uchar value = getIndexAligned(input, options.srcOffset, idx); 769 770 if (options.primitiveRestartEnabled && value == 0xff) 771 { 772 output[idx] = 0xffff; 773 } 774 else 775 { 776 output[idx] = value; 777 } 778} 779 780kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 781 constant IndexConversionParams &options [[buffer(0)]], 782 constant uchar *input 783 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 784 constant ushort *inputAligned 785 [[buffer(1), function_constant(kSourceBufferAligned)]], 786 device ushort *output [[buffer(2)]]) 787{ 788 if (idx >= options.indexCount) { return; }; 789 790 ushort value; 791 if (kSourceBufferAligned) 792 { 793 value = getIndexAligned(inputAligned, options.srcOffset, idx); 794 } 795 else 796 { 797 value = getIndexUnalignedU16(input, options.srcOffset, idx); 798 } 799 output[idx] = value; 800} 801 802kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 803 constant IndexConversionParams &options [[buffer(0)]], 804 constant uchar *input 805 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 806 constant uint *inputAligned 807 [[buffer(1), function_constant(kSourceBufferAligned)]], 808 device uint *output [[buffer(2)]]) 809{ 810 if (idx >= options.indexCount) { return; }; 811 812 uint value; 813 if (kSourceBufferAligned) 814 { 815 value = getIndexAligned(inputAligned, options.srcOffset, idx); 816 } 817 else 818 { 819 value = getIndexUnalignedU32(input, options.srcOffset, idx); 820 } 821 output[idx] = value; 822} 823 824struct IndexFromArrayParams 825{ 826 uint firstVertex; 827 828 uint vertexCount; 829}; 830 831 832kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 833 constant IndexFromArrayParams &options [[buffer(0)]], 834 device uint *output [[buffer(2)]]) 835{ 836 if (idx >= options.vertexCount) { return; }; 837 838 uint vertexIdx = options.firstVertex + 2 + idx; 839 840 841 842 output[3 * idx ] = vertexIdx - 1; 843 output[3 * idx + 1] = vertexIdx; 844 output[3 * idx + 2] = options.firstVertex; 845} 846 847inline uint getIndexU32(uint offset, 848 uint idx, 849 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 850 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 851 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 852{ 853 if (kUseSourceBufferU8) 854 { 855 if (kSourceIndexIsU16) 856 { 857 return getIndexUnalignedU16(inputU8, offset, idx); 858 } 859 else if (kSourceIndexIsU32) 860 { 861 return getIndexUnalignedU32(inputU8, offset, idx); 862 } 863 return getIndexAligned(inputU8, offset, idx); 864 } 865 else if (kUseSourceBufferU16) 866 { 867 return getIndexAligned(inputU16, offset, idx); 868 } 869 else if (kUseSourceBufferU32) 870 { 871 return getIndexAligned(inputU32, offset, idx); 872 } 873 return 0; 874} 875 876 877 878 879kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 880 constant IndexConversionParams &options [[buffer(0)]], 881 constant uchar *inputU8 882 [[buffer(1), function_constant(kUseSourceBufferU8)]], 883 constant ushort *inputU16 884 [[buffer(1), function_constant(kUseSourceBufferU16)]], 885 constant uint *inputU32 886 [[buffer(1), function_constant(kUseSourceBufferU32)]], 887 device uint *output [[buffer(2)]]) 888{ 889 if (idx >= options.indexCount) { return; }; 890 891 uint elemIdx = 2 + idx; 892 893 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 894 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 895 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 896} 897 898 899kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 900 constant IndexFromArrayParams &options [[buffer(0)]], 901 device uint *output [[buffer(2)]]) 902{ 903 uint totalIndices = options.vertexCount + 1; 904 if (idx >= totalIndices) { return; }; 905 906 output[idx] = options.firstVertex + idx % options.vertexCount; 907} 908 909 910 911kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 912 constant IndexConversionParams &options [[buffer(0)]], 913 constant uchar *inputU8 914 [[buffer(1), function_constant(kUseSourceBufferU8)]], 915 constant ushort *inputU16 916 [[buffer(1), function_constant(kUseSourceBufferU16)]], 917 constant uint *inputU32 918 [[buffer(1), function_constant(kUseSourceBufferU32)]], 919 device uint *output [[buffer(2)]]) 920{ 921 uint totalTargetIndices = options.indexCount + 1; 922 if (idx >= totalTargetIndices) { return; }; 923 924 output[idx] = 925 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 926} 927# 4 "temp_master_source.metal" 2 928# 1 "./gen_mipmap.metal" 1 929 930 931 932 933 934 935 936 937using namespace rx::mtl_shader; 938# 31 "./gen_mipmap.metal" 939struct GenMipParams 940{ 941 uint srcLevel; 942 uint numMipLevelsToGen; 943 bool sRGB; 944}; 945 946 947 948kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 949 ushort3 gIndices [[thread_position_in_grid]], 950 texture3d<float> srcTexture [[texture(0)]], 951 texture3d<float, access::write> dstMip1 [[texture(1)]], 952 texture3d<float, access::write> dstMip2 [[texture(2)]], 953 texture3d<float, access::write> dstMip3 [[texture(3)]], 954 texture3d<float, access::write> dstMip4 [[texture(4)]], 955 constant GenMipParams &options [[buffer(0)]]) 956{ 957 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 958 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 959 960 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 961 962 963 964 965 threadgroup float sR[(8 * 8 * 8)]; 966 threadgroup float sG[(8 * 8 * 8)]; 967 threadgroup float sB[(8 * 8 * 8)]; 968 threadgroup float sA[(8 * 8 * 8)]; 969 970 971 float4 texel1; 972 if (validThread) 973 { 974 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 975 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 976 977 978 dstMip1.write(texel1, gIndices); 979 } 980 else 981 { 982 983 lIndex = 0xffffffff; 984 } 985 986 if (options.numMipLevelsToGen == 1) 987 { 988 return; 989 } 990 991 992 993 994 if (options.sRGB) 995 { 996 texel1 = linearToSRGB(texel1); 997 } 998 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 999 1000 threadgroup_barrier(mem_flags::mem_threadgroup); 1001 1002 1003 if ((lIndex & 0x49) == 0) 1004 { 1005 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1006 1007 1008 1009 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1010 1011 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1012 1013 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1014 1015 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1016 1017 1018 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1019 1020 1021 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1022 1023 1024 float4 texel8 = 1025 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1026 1027 1028 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1029 1030 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1031 1032 1033 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1034 } 1035 1036 if (options.numMipLevelsToGen == 2) 1037 { 1038 return; 1039 } 1040 1041 1042 threadgroup_barrier(mem_flags::mem_threadgroup); 1043 1044 1045 if ((lIndex & 0xdb) == 0) 1046 { 1047 mipSize = max(mipSize >> 1, ushort3(1)); 1048 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1049 1050 1051 1052 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1053 1054 float4 texel3 = 1055 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1056 1057 float4 texel4 = 1058 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1059 1060 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1061 1062 1063 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1064 1065 1066 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1067 1068 1069 1070 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1071 1072 1073 1074 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1075 1076 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1077 1078 1079 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1080 } 1081 1082 if (options.numMipLevelsToGen == 3) 1083 { 1084 return; 1085 } 1086 1087 1088 threadgroup_barrier(mem_flags::mem_threadgroup); 1089 1090 1091 if ((lIndex & 0x1ff) == 0) 1092 { 1093 mipSize = max(mipSize >> 1, ushort3(1)); 1094 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1095 1096 1097 1098 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1099 1100 float4 texel3 = 1101 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1102 1103 float4 texel4 = 1104 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1105 1106 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1107 1108 1109 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1110 1111 1112 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1113 1114 1115 1116 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1117 1118 1119 1120 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1121 1122 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1123 } 1124} 1125 1126kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1127 ushort2 gIndices [[thread_position_in_grid]], 1128 texture2d<float> srcTexture [[texture(0)]], 1129 texture2d<float, access::write> dstMip1 [[texture(1)]], 1130 texture2d<float, access::write> dstMip2 [[texture(2)]], 1131 texture2d<float, access::write> dstMip3 [[texture(3)]], 1132 texture2d<float, access::write> dstMip4 [[texture(4)]], 1133 constant GenMipParams &options [[buffer(0)]]) 1134{ 1135 uint firstMipLevel = options.srcLevel + 1; 1136 ushort2 mipSize = 1137 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1138 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1139 1140 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1141 1142 1143 1144 1145 threadgroup float sR[(8 * 8)]; 1146 threadgroup float sG[(8 * 8)]; 1147 threadgroup float sB[(8 * 8)]; 1148 threadgroup float sA[(8 * 8)]; 1149 1150 1151 float4 texel1; 1152 if (validThread) 1153 { 1154 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1155 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1156 1157 1158 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1159 } 1160 else 1161 { 1162 1163 lIndex = 0xffffffff; 1164 } 1165 1166 if (options.numMipLevelsToGen == 1) 1167 { 1168 return; 1169 } 1170 1171 1172 1173 1174 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1175 1176 threadgroup_barrier(mem_flags::mem_threadgroup); 1177 1178 1179 if ((lIndex & 0x09) == 0) 1180 { 1181 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1182 1183 1184 1185 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1186 1187 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1188 1189 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1190 1191 1192 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1193 1194 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1195 1196 1197 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1198 } 1199 1200 if (options.numMipLevelsToGen == 2) 1201 { 1202 return; 1203 } 1204 1205 1206 threadgroup_barrier(mem_flags::mem_threadgroup); 1207 1208 1209 if ((lIndex & 0x1b) == 0) 1210 { 1211 mipSize = max(mipSize >> 1, ushort2(1)); 1212 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1213 1214 1215 1216 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1217 1218 float4 texel3 = 1219 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1220 1221 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1222 1223 1224 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1225 1226 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1227 1228 1229 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1230 } 1231 1232 if (options.numMipLevelsToGen == 3) 1233 { 1234 return; 1235 } 1236 1237 1238 threadgroup_barrier(mem_flags::mem_threadgroup); 1239 1240 1241 if ((lIndex & 0x3f) == 0) 1242 { 1243 mipSize = max(mipSize >> 1, ushort2(1)); 1244 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1245 1246 1247 1248 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1249 1250 float4 texel3 = 1251 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1252 1253 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1254 1255 1256 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1257 1258 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1259 } 1260} 1261 1262template <typename TextureTypeR, typename TextureTypeW> 1263static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1264 uint lIndex, 1265 ushort3 gIndices, 1266 TextureTypeR srcTexture, 1267 TextureTypeW dstMip2, 1268 TextureTypeW dstMip3, 1269 TextureTypeW dstMip4, 1270 ushort2 mip1Size, 1271 float4 mip1Texel, 1272 threadgroup float *sR, 1273 threadgroup float *sG, 1274 threadgroup float *sB, 1275 threadgroup float *sA, 1276 constant GenMipParams &options) 1277{ 1278 ushort2 mipSize = mip1Size; 1279 float4 texel1 = mip1Texel; 1280 1281 1282 1283 1284 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1285 1286 threadgroup_barrier(mem_flags::mem_threadgroup); 1287 1288 1289 if ((lIndex & 0x09) == 0) 1290 { 1291 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1292 1293 1294 1295 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1296 1297 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1298 1299 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1300 1301 1302 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1303 1304 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1305 1306 1307 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1308 } 1309 1310 if (options.numMipLevelsToGen == 2) 1311 { 1312 return; 1313 } 1314 1315 1316 threadgroup_barrier(mem_flags::mem_threadgroup); 1317 1318 1319 if ((lIndex & 0x1b) == 0) 1320 { 1321 mipSize = max(mipSize >> 1, ushort2(1)); 1322 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1323 1324 1325 1326 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1327 1328 float4 texel3 = 1329 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1330 1331 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1332 1333 1334 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1335 1336 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1337 1338 1339 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1340 } 1341 1342 if (options.numMipLevelsToGen == 3) 1343 { 1344 return; 1345 } 1346 1347 1348 threadgroup_barrier(mem_flags::mem_threadgroup); 1349 1350 1351 if ((lIndex & 0x3f) == 0) 1352 { 1353 mipSize = max(mipSize >> 1, ushort2(1)); 1354 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1355 1356 1357 1358 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1359 1360 float4 texel3 = 1361 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1362 1363 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1364 1365 1366 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1367 1368 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1369 } 1370} 1371 1372kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1373 ushort3 gIndices [[thread_position_in_grid]], 1374 texturecube<float> srcTexture [[texture(0)]], 1375 texturecube<float, access::write> dstMip1 [[texture(1)]], 1376 texturecube<float, access::write> dstMip2 [[texture(2)]], 1377 texturecube<float, access::write> dstMip3 [[texture(3)]], 1378 texturecube<float, access::write> dstMip4 [[texture(4)]], 1379 constant GenMipParams &options [[buffer(0)]]) 1380{ 1381 uint firstMipLevel = options.srcLevel + 1; 1382 ushort2 mip1Size = 1383 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1384 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1385 1386 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1387 1388 1389 float4 mip1Texel; 1390 if (validThread) 1391 { 1392 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1393 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1394 level(options.srcLevel)); 1395 1396 1397 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1398 } 1399 else 1400 { 1401 1402 lIndex = 0xffffffff; 1403 } 1404 1405 if (options.numMipLevelsToGen == 1) 1406 { 1407 return; 1408 } 1409 1410 1411 threadgroup float sR[(8 * 8)]; 1412 threadgroup float sG[(8 * 8)]; 1413 threadgroup float sB[(8 * 8)]; 1414 threadgroup float sA[(8 * 8)]; 1415 1416 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1417 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1418} 1419 1420kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1421 ushort3 gIndices [[thread_position_in_grid]], 1422 texture2d_array<float> srcTexture [[texture(0)]], 1423 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1424 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1425 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1426 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1427 constant GenMipParams &options [[buffer(0)]]) 1428{ 1429 uint firstMipLevel = options.srcLevel + 1; 1430 ushort2 mip1Size = 1431 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1432 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1433 1434 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1435 1436 1437 float4 mip1Texel; 1438 if (validThread) 1439 { 1440 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1441 mip1Texel = 1442 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1443 1444 1445 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1446 } 1447 else 1448 { 1449 1450 lIndex = 0xffffffff; 1451 } 1452 1453 if (options.numMipLevelsToGen == 1) 1454 { 1455 return; 1456 } 1457 1458 1459 threadgroup float sR[(8 * 8)]; 1460 threadgroup float sG[(8 * 8)]; 1461 threadgroup float sB[(8 * 8)]; 1462 threadgroup float sA[(8 * 8)]; 1463 1464 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1465 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1466} 1467# 5 "temp_master_source.metal" 2 1468# 1 "./copy_buffer.metal" 1 1469# 12 "./copy_buffer.metal" 1470#include <metal_pack> 1471 1472 1473# 1 "./format_autogen.h" 1 1474# 11 "./format_autogen.h" 1475namespace rx 1476{ 1477namespace mtl_shader 1478{ 1479 1480namespace FormatID 1481{ 1482enum 1483{ 1484 NONE, 1485 D16_UNORM, 1486 D24_UNORM_S8_UINT, 1487 D24_UNORM_X8_UINT, 1488 D32_FLOAT, 1489 D32_FLOAT_S8X24_UINT, 1490 D32_UNORM, 1491 S8_UINT, 1492 A16_FLOAT, 1493 A1R5G5B5_UNORM, 1494 A2R10G10B10_SINT_VERTEX, 1495 A2R10G10B10_SNORM_VERTEX, 1496 A2R10G10B10_SSCALED_VERTEX, 1497 A2R10G10B10_UINT_VERTEX, 1498 A2R10G10B10_UNORM_VERTEX, 1499 A2R10G10B10_USCALED_VERTEX, 1500 A32_FLOAT, 1501 A8_UNORM, 1502 ASTC_10x10_SRGB_BLOCK, 1503 ASTC_10x10_UNORM_BLOCK, 1504 ASTC_10x5_SRGB_BLOCK, 1505 ASTC_10x5_UNORM_BLOCK, 1506 ASTC_10x6_SRGB_BLOCK, 1507 ASTC_10x6_UNORM_BLOCK, 1508 ASTC_10x8_SRGB_BLOCK, 1509 ASTC_10x8_UNORM_BLOCK, 1510 ASTC_12x10_SRGB_BLOCK, 1511 ASTC_12x10_UNORM_BLOCK, 1512 ASTC_12x12_SRGB_BLOCK, 1513 ASTC_12x12_UNORM_BLOCK, 1514 ASTC_3x3x3_UNORM_BLOCK, 1515 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1516 ASTC_4x3x3_UNORM_BLOCK, 1517 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1518 ASTC_4x4_SRGB_BLOCK, 1519 ASTC_4x4_UNORM_BLOCK, 1520 ASTC_4x4x3_UNORM_BLOCK, 1521 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1522 ASTC_4x4x4_UNORM_BLOCK, 1523 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1524 ASTC_5x4_SRGB_BLOCK, 1525 ASTC_5x4_UNORM_BLOCK, 1526 ASTC_5x4x4_UNORM_BLOCK, 1527 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1528 ASTC_5x5_SRGB_BLOCK, 1529 ASTC_5x5_UNORM_BLOCK, 1530 ASTC_5x5x4_UNORM_BLOCK, 1531 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1532 ASTC_5x5x5_UNORM_BLOCK, 1533 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1534 ASTC_6x5_SRGB_BLOCK, 1535 ASTC_6x5_UNORM_BLOCK, 1536 ASTC_6x5x5_UNORM_BLOCK, 1537 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1538 ASTC_6x6_SRGB_BLOCK, 1539 ASTC_6x6_UNORM_BLOCK, 1540 ASTC_6x6x5_UNORM_BLOCK, 1541 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1542 ASTC_6x6x6_UNORM_BLOCK, 1543 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1544 ASTC_8x5_SRGB_BLOCK, 1545 ASTC_8x5_UNORM_BLOCK, 1546 ASTC_8x6_SRGB_BLOCK, 1547 ASTC_8x6_UNORM_BLOCK, 1548 ASTC_8x8_SRGB_BLOCK, 1549 ASTC_8x8_UNORM_BLOCK, 1550 B10G10R10A2_UNORM, 1551 B4G4R4A4_UNORM, 1552 B5G5R5A1_UNORM, 1553 B5G6R5_UNORM, 1554 B8G8R8A8_TYPELESS, 1555 B8G8R8A8_TYPELESS_SRGB, 1556 B8G8R8A8_UNORM, 1557 B8G8R8A8_UNORM_SRGB, 1558 B8G8R8X8_UNORM, 1559 BC1_RGBA_UNORM_BLOCK, 1560 BC1_RGBA_UNORM_SRGB_BLOCK, 1561 BC1_RGB_UNORM_BLOCK, 1562 BC1_RGB_UNORM_SRGB_BLOCK, 1563 BC2_RGBA_UNORM_BLOCK, 1564 BC2_RGBA_UNORM_SRGB_BLOCK, 1565 BC3_RGBA_UNORM_BLOCK, 1566 BC3_RGBA_UNORM_SRGB_BLOCK, 1567 BC4_RED_SNORM_BLOCK, 1568 BC4_RED_UNORM_BLOCK, 1569 BC5_RG_SNORM_BLOCK, 1570 BC5_RG_UNORM_BLOCK, 1571 BC6H_RGB_SFLOAT_BLOCK, 1572 BC6H_RGB_UFLOAT_BLOCK, 1573 BC7_RGBA_UNORM_BLOCK, 1574 BC7_RGBA_UNORM_SRGB_BLOCK, 1575 EAC_R11G11_SNORM_BLOCK, 1576 EAC_R11G11_UNORM_BLOCK, 1577 EAC_R11_SNORM_BLOCK, 1578 EAC_R11_UNORM_BLOCK, 1579 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1580 ETC1_R8G8B8_UNORM_BLOCK, 1581 ETC2_R8G8B8A1_SRGB_BLOCK, 1582 ETC2_R8G8B8A1_UNORM_BLOCK, 1583 ETC2_R8G8B8A8_SRGB_BLOCK, 1584 ETC2_R8G8B8A8_UNORM_BLOCK, 1585 ETC2_R8G8B8_SRGB_BLOCK, 1586 ETC2_R8G8B8_UNORM_BLOCK, 1587 G8_B8R8_2PLANE_420_UNORM, 1588 G8_B8_R8_3PLANE_420_UNORM, 1589 L16A16_FLOAT, 1590 L16_FLOAT, 1591 L32A32_FLOAT, 1592 L32_FLOAT, 1593 L8A8_UNORM, 1594 L8_UNORM, 1595 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1596 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1597 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1598 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1599 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1600 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1601 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1602 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1603 R10G10B10A2_SINT, 1604 R10G10B10A2_SNORM, 1605 R10G10B10A2_SSCALED, 1606 R10G10B10A2_UINT, 1607 R10G10B10A2_UNORM, 1608 R10G10B10A2_USCALED, 1609 R10G10B10X2_UNORM, 1610 R11G11B10_FLOAT, 1611 R16G16B16A16_FLOAT, 1612 R16G16B16A16_SINT, 1613 R16G16B16A16_SNORM, 1614 R16G16B16A16_SSCALED, 1615 R16G16B16A16_UINT, 1616 R16G16B16A16_UNORM, 1617 R16G16B16A16_USCALED, 1618 R16G16B16_FLOAT, 1619 R16G16B16_SINT, 1620 R16G16B16_SNORM, 1621 R16G16B16_SSCALED, 1622 R16G16B16_UINT, 1623 R16G16B16_UNORM, 1624 R16G16B16_USCALED, 1625 R16G16_FLOAT, 1626 R16G16_SINT, 1627 R16G16_SNORM, 1628 R16G16_SSCALED, 1629 R16G16_UINT, 1630 R16G16_UNORM, 1631 R16G16_USCALED, 1632 R16_FLOAT, 1633 R16_SINT, 1634 R16_SNORM, 1635 R16_SSCALED, 1636 R16_UINT, 1637 R16_UNORM, 1638 R16_USCALED, 1639 R32G32B32A32_FIXED, 1640 R32G32B32A32_FLOAT, 1641 R32G32B32A32_SINT, 1642 R32G32B32A32_SNORM, 1643 R32G32B32A32_SSCALED, 1644 R32G32B32A32_UINT, 1645 R32G32B32A32_UNORM, 1646 R32G32B32A32_USCALED, 1647 R32G32B32_FIXED, 1648 R32G32B32_FLOAT, 1649 R32G32B32_SINT, 1650 R32G32B32_SNORM, 1651 R32G32B32_SSCALED, 1652 R32G32B32_UINT, 1653 R32G32B32_UNORM, 1654 R32G32B32_USCALED, 1655 R32G32_FIXED, 1656 R32G32_FLOAT, 1657 R32G32_SINT, 1658 R32G32_SNORM, 1659 R32G32_SSCALED, 1660 R32G32_UINT, 1661 R32G32_UNORM, 1662 R32G32_USCALED, 1663 R32_FIXED, 1664 R32_FLOAT, 1665 R32_SINT, 1666 R32_SNORM, 1667 R32_SSCALED, 1668 R32_UINT, 1669 R32_UNORM, 1670 R32_USCALED, 1671 R4G4B4A4_UNORM, 1672 R5G5B5A1_UNORM, 1673 R5G6B5_UNORM, 1674 R8G8B8A8_SINT, 1675 R8G8B8A8_SNORM, 1676 R8G8B8A8_SSCALED, 1677 R8G8B8A8_TYPELESS, 1678 R8G8B8A8_TYPELESS_SRGB, 1679 R8G8B8A8_UINT, 1680 R8G8B8A8_UNORM, 1681 R8G8B8A8_UNORM_SRGB, 1682 R8G8B8A8_USCALED, 1683 R8G8B8X8_UNORM, 1684 R8G8B8_SINT, 1685 R8G8B8_SNORM, 1686 R8G8B8_SSCALED, 1687 R8G8B8_UINT, 1688 R8G8B8_UNORM, 1689 R8G8B8_UNORM_SRGB, 1690 R8G8B8_USCALED, 1691 R8G8_SINT, 1692 R8G8_SNORM, 1693 R8G8_SSCALED, 1694 R8G8_UINT, 1695 R8G8_UNORM, 1696 R8G8_UNORM_SRGB, 1697 R8G8_USCALED, 1698 R8_SINT, 1699 R8_SNORM, 1700 R8_SSCALED, 1701 R8_UINT, 1702 R8_UNORM, 1703 R8_UNORM_SRGB, 1704 R8_USCALED, 1705 R9G9B9E5_SHAREDEXP, 1706 X2R10G10B10_SINT_VERTEX, 1707 X2R10G10B10_SNORM_VERTEX, 1708 X2R10G10B10_SSCALED_VERTEX, 1709 X2R10G10B10_UINT_VERTEX, 1710 X2R10G10B10_UNORM_VERTEX, 1711 X2R10G10B10_USCALED_VERTEX 1712}; 1713 1714} 1715 1716} 1717} 1718# 16 "./copy_buffer.metal" 2 1719 1720using namespace rx::mtl_shader; 1721 1722constant int kCopyFormatType [[function_constant(10)]]; 1723 1724 1725constant int kCopyTextureType [[function_constant(20)]]; 1726constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1727constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1728constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1729constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1730constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1731 1732struct CopyPixelParams 1733{ 1734 uint3 copySize; 1735 uint3 textureOffset; 1736 1737 uint bufferStartOffset; 1738 uint pixelSize; 1739 uint bufferRowPitch; 1740 uint bufferDepthPitch; 1741}; 1742 1743struct WritePixelParams 1744{ 1745 uint2 copySize; 1746 uint2 textureOffset; 1747 1748 uint bufferStartOffset; 1749 1750 uint pixelSize; 1751 uint bufferRowPitch; 1752 1753 uint textureLevel; 1754 uint textureLayer; 1755 1756 bool reverseTextureRowOrder; 1757}; 1758# 120 "./copy_buffer.metal" 1759template <typename T> 1760static inline void textureWrite(ushort3 gIndices, 1761 constant CopyPixelParams &options, 1762 vec<T, 4> color, 1763 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1764{ 1765 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1766 switch (kCopyTextureType) 1767 { 1768 case kTextureType2D: 1769 dstTexture2d.write(color, writeIndices.xy); 1770 break; 1771 case kTextureType2DArray: 1772 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1773 break; 1774 case kTextureType3D: 1775 dstTexture3d.write(color, writeIndices); 1776 break; 1777 case kTextureTypeCube: 1778 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1779 break; 1780 } 1781} 1782 1783 1784template <typename T> 1785static inline vec<T, 4> textureRead(ushort2 gIndices, 1786 constant WritePixelParams &options, 1787 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1788{ 1789 vec<T, 4> color; 1790 uint2 coords = uint2(gIndices); 1791 if (options.reverseTextureRowOrder) 1792 { 1793 coords.y = options.copySize.y - 1 - gIndices.y; 1794 } 1795 coords += options.textureOffset; 1796 switch (kCopyTextureType) 1797 { 1798 case kTextureType2D: 1799 color = srcTexture2d.read(coords.xy, options.textureLevel); 1800 break; 1801 case kTextureType2DArray: 1802 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1803 break; 1804 case kTextureType2DMultisample: 1805 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1806 break; 1807 case kTextureType3D: 1808 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1809 break; 1810 case kTextureTypeCube: 1811 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1812 break; 1813 } 1814 return color; 1815} 1816# 215 "./copy_buffer.metal" 1817static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1818{ 1819 float4 color; 1820 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1821 1822 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1823 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1824 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1825 color.a = 1.0; 1826 return color; 1827} 1828static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1829{ 1830 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1831 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1832 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1833 1834 shortToBytes(dst, bufferOffset, buffer); 1835} 1836 1837 1838static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1839{ 1840 float4 color; 1841 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1842 1843 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1844 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1845 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1846 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1847 return color; 1848} 1849static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1850{ 1851 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1852 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1853 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1854 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1855 ; 1856 1857 shortToBytes(dst, bufferOffset, buffer); 1858} 1859 1860 1861static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1862{ 1863 float4 color; 1864 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1865 1866 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1867 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1868 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1869 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1870 return color; 1871} 1872static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1873{ 1874 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1875 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1876 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1877 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1878 ; 1879 1880 shortToBytes(dst, bufferOffset, buffer); 1881} 1882 1883 1884static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1885{ 1886 int4 color; 1887 int src = bytesToInt<int>(buffer, bufferOffset); 1888 1889 constexpr int3 rgbSignMask(0x200); 1890 constexpr int3 negativeMask(0xfffffc00); 1891 constexpr int alphaSignMask = 0x2; 1892 constexpr int alphaNegMask = 0xfffffffc; 1893 1894 color.r = getShiftedData<10, 0>(src); 1895 color.g = getShiftedData<10, 10>(src); 1896 color.b = getShiftedData<10, 20>(src); 1897 1898 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1899 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1900 1901 color.a = getShiftedData<2, 30>(src); 1902 int isAlphaNegative = color.a & alphaSignMask >> 1; 1903 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1904 return color; 1905} 1906 1907static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1908{ 1909 uint4 color; 1910 uint src = bytesToInt<uint>(buffer, bufferOffset); 1911 1912 color.r = getShiftedData<10, 0>(src); 1913 color.g = getShiftedData<10, 10>(src); 1914 color.b = getShiftedData<10, 20>(src); 1915 color.a = getShiftedData<2, 30>(src); 1916 return color; 1917} 1918 1919 1920static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1921{ 1922 float4 color; 1923 uint src = bytesToInt<uint>(buffer, bufferOffset); 1924 1925 if (isSRGB) 1926 { 1927 color = unpack_unorm4x8_srgb_to_float(src); 1928 } 1929 else 1930 { 1931 color = unpack_unorm4x8_to_float(src); 1932 } 1933 return color; 1934} 1935static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1936{ 1937 uint dst; 1938 1939 if (isSRGB) 1940 { 1941 dst = pack_float_to_srgb_unorm4x8(color); 1942 } 1943 else 1944 { 1945 dst = pack_float_to_unorm4x8(color); 1946 } 1947 1948 intToBytes(dst, bufferOffset, buffer); 1949} 1950 1951static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1952{ 1953 float4 color; 1954 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1955 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1956 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1957 color.a = 1.0; 1958 1959 if (isSRGB) 1960 { 1961 color = sRGBtoLinear(color); 1962 } 1963 return color; 1964} 1965static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1966{ 1967 color.a = 1.0; 1968 uint dst; 1969 1970 if (isSRGB) 1971 { 1972 dst = pack_float_to_srgb_unorm4x8(color); 1973 } 1974 else 1975 { 1976 dst = pack_float_to_unorm4x8(color); 1977 } 1978 int24bitToBytes(dst, bufferOffset, buffer); 1979} 1980 1981 1982static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 1983{ 1984 float4 color; 1985 uint src = bytesToInt<uint>(buffer, bufferOffset); 1986 1987 color = unpack_snorm4x8_to_float(src); 1988 1989 return color; 1990} 1991static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1992{ 1993 uint dst = pack_float_to_snorm4x8(color); 1994 1995 intToBytes(dst, bufferOffset, buffer); 1996} 1997 1998 1999static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 2000{ 2001 float4 color; 2002 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2003 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2004 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2005 color.a = 1.0; 2006 2007 return color; 2008} 2009static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2010{ 2011 uint dst = pack_float_to_snorm4x8(color); 2012 2013 int24bitToBytes(dst, bufferOffset, buffer); 2014} 2015 2016 2017static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2018{ 2019 return readR8G8B8A8(bufferOffset, buffer, false); 2020} 2021static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2022{ 2023 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2024} 2025 2026static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2027{ 2028 return readR8G8B8A8(bufferOffset, buffer, true); 2029} 2030static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2031{ 2032 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2033} 2034 2035 2036static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2037{ 2038 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2039} 2040static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2041{ 2042 color.rgba = color.bgra; 2043 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2044} 2045 2046static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2047{ 2048 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2049} 2050static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2051{ 2052 color.rgba = color.bgra; 2053 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2054} 2055 2056 2057static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2058{ 2059 return readR8G8B8(bufferOffset, buffer, false); 2060} 2061static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2062{ 2063 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2064} 2065 2066static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2067{ 2068 return readR8G8B8(bufferOffset, buffer, true); 2069} 2070static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2071{ 2072 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2073} 2074 2075 2076static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2077{ 2078 float4 color; 2079 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2080 color.a = 1.0; 2081 return color; 2082} 2083static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2084{ 2085 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2086} 2087 2088 2089static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2090{ 2091 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2092} 2093 2094 2095static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2096{ 2097 float4 color; 2098 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2099 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2100 return color; 2101} 2102static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2103{ 2104 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2105 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2106} 2107 2108 2109static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2110{ 2111 float4 color; 2112 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2113 color.g = color.b = 0.0; 2114 color.a = 1.0; 2115 return color; 2116} 2117static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2118{ 2119 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2120} 2121 2122static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2123{ 2124 float4 color; 2125 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2126 color.g = color.b = 0.0; 2127 color.a = 1.0; 2128 return color; 2129} 2130static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2131{ 2132 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2133} 2134 2135 2136static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2137{ 2138 int4 color; 2139 color.r = as_type<char>(buffer[bufferOffset]); 2140 color.g = color.b = 0; 2141 color.a = 1; 2142 return color; 2143} 2144static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2145{ 2146 buffer[bufferOffset] = static_cast<uchar>(color.r); 2147} 2148 2149 2150static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2151{ 2152 uint4 color; 2153 color.r = as_type<uchar>(buffer[bufferOffset]); 2154 color.g = color.b = 0; 2155 color.a = 1; 2156 return color; 2157} 2158static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2159{ 2160 buffer[bufferOffset] = static_cast<uchar>(color.r); 2161} 2162 2163 2164static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2165{ 2166 float4 color; 2167 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2168 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2169 color.b = 0.0; 2170 color.a = 1.0; 2171 return color; 2172} 2173static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2174{ 2175 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2176 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2177} 2178 2179static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2180{ 2181 float4 color; 2182 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2183 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2184 color.b = 0.0; 2185 color.a = 1.0; 2186 return color; 2187} 2188static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2189{ 2190 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2191 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g)); 2192} 2193 2194 2195static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2196{ 2197 int4 color; 2198 color.r = as_type<char>(buffer[bufferOffset]); 2199 color.g = as_type<char>(buffer[bufferOffset + 1]); 2200 color.b = 0; 2201 color.a = 1; 2202 return color; 2203} 2204static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2205{ 2206 buffer[bufferOffset] = static_cast<uchar>(color.r); 2207 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2208} 2209 2210 2211static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2212{ 2213 uint4 color; 2214 color.r = as_type<uchar>(buffer[bufferOffset]); 2215 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2216 color.b = 0; 2217 color.a = 1; 2218 return color; 2219} 2220static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2221{ 2222 buffer[bufferOffset] = static_cast<uchar>(color.r); 2223 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2224} 2225 2226 2227static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2228{ 2229 int4 color; 2230 color.r = as_type<char>(buffer[bufferOffset]); 2231 color.g = as_type<char>(buffer[bufferOffset + 1]); 2232 color.b = as_type<char>(buffer[bufferOffset + 2]); 2233 color.a = 1; 2234 return color; 2235} 2236 2237 2238static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2239{ 2240 uint4 color; 2241 color.r = as_type<uchar>(buffer[bufferOffset]); 2242 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2243 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2244 color.a = 1; 2245 return color; 2246} 2247 2248 2249static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2250{ 2251 int4 color; 2252 color.r = as_type<char>(buffer[bufferOffset]); 2253 color.g = as_type<char>(buffer[bufferOffset + 1]); 2254 color.b = as_type<char>(buffer[bufferOffset + 2]); 2255 color.a = as_type<char>(buffer[bufferOffset + 3]); 2256 return color; 2257} 2258static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2259{ 2260 buffer[bufferOffset] = static_cast<uchar>(color.r); 2261 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2262 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2263 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2264} 2265 2266 2267static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2268{ 2269 uint4 color; 2270 color.r = as_type<uchar>(buffer[bufferOffset]); 2271 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2272 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2273 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2274 return color; 2275} 2276static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2277{ 2278 buffer[bufferOffset] = static_cast<uchar>(color.r); 2279 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2280 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2281 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2282} 2283 2284 2285static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2286{ 2287 float4 color; 2288 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2289 color.g = color.b = 0.0; 2290 color.a = 1.0; 2291 return color; 2292} 2293static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2294{ 2295 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2296} 2297 2298template <typename ShortType> 2299static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2300{ 2301 float4 color; 2302 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2303 color.g = color.b = 0.0; 2304 color.a = 1.0; 2305 return color; 2306} 2307 2308 2309 2310 2311static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2312{ 2313 int4 color; 2314 color.r = bytesToShort<short>(buffer, bufferOffset); 2315 color.g = color.b = 0; 2316 color.a = 1; 2317 return color; 2318} 2319static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2320{ 2321 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2322} 2323 2324 2325static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2326{ 2327 uint4 color; 2328 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2329 color.g = color.b = 0; 2330 color.a = 1; 2331 return color; 2332} 2333static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2334{ 2335 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2336} 2337 2338 2339static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2340{ 2341 float4 color; 2342 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2343 color.rgb = 0.0; 2344 return color; 2345} 2346static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2347{ 2348 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2349} 2350 2351 2352static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2353{ 2354 float4 color; 2355 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2356 color.a = 1.0; 2357 return color; 2358} 2359static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2360{ 2361 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2362} 2363 2364 2365static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2366{ 2367 float4 color; 2368 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2369 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2370 return color; 2371} 2372static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2373{ 2374 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2375 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2376} 2377 2378 2379static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2380{ 2381 float4 color; 2382 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2383 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2384 color.b = 0.0; 2385 color.a = 1.0; 2386 return color; 2387} 2388static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2389{ 2390 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2391 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2392} 2393 2394 2395template <typename ShortType> 2396static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2397{ 2398 float4 color; 2399 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2400 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2401 color.b = 0.0; 2402 color.a = 1.0; 2403 return color; 2404} 2405 2406 2407 2408 2409static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2410{ 2411 int4 color; 2412 color.r = bytesToShort<short>(buffer, bufferOffset); 2413 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2414 color.b = 0; 2415 color.a = 1; 2416 return color; 2417} 2418static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2419{ 2420 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2421 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2422} 2423 2424 2425static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2426{ 2427 uint4 color; 2428 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2429 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2430 color.b = 0; 2431 color.a = 1; 2432 return color; 2433} 2434static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2435{ 2436 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2437 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2438} 2439 2440 2441static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2442{ 2443 float4 color; 2444 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2445 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2446 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2447 color.a = 1.0; 2448 return color; 2449} 2450 2451 2452template <typename ShortType> 2453static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2454{ 2455 float4 color; 2456 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2457 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2458 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2459 color.a = 1.0; 2460 return color; 2461} 2462 2463 2464 2465static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2466{ 2467 int4 color; 2468 color.r = bytesToShort<short>(buffer, bufferOffset); 2469 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2470 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2471 color.a = 1; 2472 return color; 2473} 2474 2475 2476static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2477{ 2478 uint4 color; 2479 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2480 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2481 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2482 color.a = 1; 2483 return color; 2484} 2485 2486 2487static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2488{ 2489 float4 color; 2490 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2491 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2492 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2493 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2494 return color; 2495} 2496static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2497{ 2498 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2499 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2500 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2501 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2502} 2503 2504 2505template <typename ShortType> 2506static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2507{ 2508 float4 color; 2509 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2510 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2511 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2512 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2513 return color; 2514} 2515 2516 2517 2518 2519static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2520{ 2521 int4 color; 2522 color.r = bytesToShort<short>(buffer, bufferOffset); 2523 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2524 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2525 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2526 return color; 2527} 2528static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2529{ 2530 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2531 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2532 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2533 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2534} 2535 2536 2537static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2538{ 2539 uint4 color; 2540 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2541 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2542 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2543 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2544 return color; 2545} 2546static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2547{ 2548 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2549 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2550 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2551 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2552} 2553 2554 2555static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2556{ 2557 float4 color; 2558 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2559 color.g = color.b = 0.0; 2560 color.a = 1.0; 2561 return color; 2562} 2563static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2564{ 2565 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2566} 2567 2568 2569template <typename IntType> 2570static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2571{ 2572 float4 color; 2573 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2574 color.g = color.b = 0.0; 2575 color.a = 1.0; 2576 return color; 2577} 2578 2579 2580 2581 2582static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2583{ 2584 float4 color; 2585 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2586 color.rgb = 0.0; 2587 return color; 2588} 2589static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2590{ 2591 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2592} 2593 2594 2595static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2596{ 2597 float4 color; 2598 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2599 color.a = 1.0; 2600 return color; 2601} 2602static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2603{ 2604 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2605} 2606 2607 2608static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2609{ 2610 int4 color; 2611 color.r = bytesToInt<int>(buffer, bufferOffset); 2612 color.g = color.b = 0; 2613 color.a = 1; 2614 return color; 2615} 2616static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2617{ 2618 intToBytes(color.r, bufferOffset, buffer); 2619} 2620 2621 2622static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2623{ 2624 float4 color; 2625 constexpr float kDivisor = 1.0f / (1 << 16); 2626 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2627 color.g = color.b = 0.0; 2628 color.a = 1.0; 2629 return color; 2630} 2631 2632 2633static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2634{ 2635 uint4 color; 2636 color.r = bytesToInt<uint>(buffer, bufferOffset); 2637 color.g = color.b = 0; 2638 color.a = 1; 2639 return color; 2640} 2641static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2642{ 2643 intToBytes(color.r, bufferOffset, buffer); 2644} 2645 2646 2647static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2648{ 2649 float4 color; 2650 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2651 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2652 return color; 2653} 2654static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2655{ 2656 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2657 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2658} 2659 2660 2661static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2662{ 2663 float4 color; 2664 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2665 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2666 color.b = 0.0; 2667 color.a = 1.0; 2668 return color; 2669} 2670static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2671{ 2672 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2673 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2674} 2675 2676 2677template <typename IntType> 2678static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2679{ 2680 float4 color; 2681 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2682 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2683 color.b = 0.0; 2684 color.a = 1.0; 2685 return color; 2686} 2687 2688 2689 2690 2691static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2692{ 2693 int4 color; 2694 color.r = bytesToInt<int>(buffer, bufferOffset); 2695 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2696 color.b = 0; 2697 color.a = 1; 2698 return color; 2699} 2700static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2701{ 2702 intToBytes(color.r, bufferOffset, buffer); 2703 intToBytes(color.g, bufferOffset + 4, buffer); 2704} 2705 2706 2707static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2708{ 2709 float4 color; 2710 constexpr float kDivisor = 1.0f / (1 << 16); 2711 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2712 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2713 color.b = 0.0; 2714 color.a = 1.0; 2715 return color; 2716} 2717 2718 2719static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2720{ 2721 uint4 color; 2722 color.r = bytesToInt<uint>(buffer, bufferOffset); 2723 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2724 color.b = 0; 2725 color.a = 1; 2726 return color; 2727} 2728static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2729{ 2730 intToBytes(color.r, bufferOffset, buffer); 2731 intToBytes(color.g, bufferOffset + 4, buffer); 2732} 2733 2734 2735static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2736{ 2737 float4 color; 2738 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2739 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2740 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2741 color.a = 1.0; 2742 return color; 2743} 2744 2745 2746template <typename IntType> 2747static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2748{ 2749 float4 color; 2750 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2751 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2752 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2753 color.a = 1.0; 2754 return color; 2755} 2756 2757 2758 2759 2760static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2761{ 2762 int4 color; 2763 color.r = bytesToInt<int>(buffer, bufferOffset); 2764 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2765 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2766 color.a = 1; 2767 return color; 2768} 2769 2770 2771static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2772{ 2773 float4 color; 2774 constexpr float kDivisor = 1.0f / (1 << 16); 2775 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2776 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2777 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2778 color.a = 1.0; 2779 return color; 2780} 2781 2782 2783static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2784{ 2785 uint4 color; 2786 color.r = bytesToInt<uint>(buffer, bufferOffset); 2787 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2788 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2789 color.a = 1; 2790 return color; 2791} 2792 2793 2794static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2795{ 2796 float4 color; 2797 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2798 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2799 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2800 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2801 return color; 2802} 2803static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2804{ 2805 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2806 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2807 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2808 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2809} 2810 2811 2812template <typename IntType> 2813static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2814{ 2815 float4 color; 2816 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2817 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2818 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2819 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2820 return color; 2821} 2822 2823 2824 2825 2826static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2827{ 2828 int4 color; 2829 color.r = bytesToInt<int>(buffer, bufferOffset); 2830 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2831 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2832 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2833 return color; 2834} 2835static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2836{ 2837 intToBytes(color.r, bufferOffset, buffer); 2838 intToBytes(color.g, bufferOffset + 4, buffer); 2839 intToBytes(color.b, bufferOffset + 8, buffer); 2840 intToBytes(color.a, bufferOffset + 12, buffer); 2841} 2842 2843static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2844{ 2845 float4 color; 2846 constexpr float kDivisor = 1.0f / (1 << 16); 2847 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2848 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2849 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2850 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2851 return color; 2852} 2853 2854 2855static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2856{ 2857 uint4 color; 2858 color.r = bytesToInt<uint>(buffer, bufferOffset); 2859 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2860 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2861 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2862 return color; 2863} 2864static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2865{ 2866 intToBytes(color.r, bufferOffset, buffer); 2867 intToBytes(color.g, bufferOffset + 4, buffer); 2868 intToBytes(color.b, bufferOffset + 8, buffer); 2869 intToBytes(color.a, bufferOffset + 12, buffer); 2870} 2871# 1292 "./copy_buffer.metal" 2872static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2873static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2874static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2875 2876static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2877 2878 2879kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2880{ 2881 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2882# 1336 "./copy_buffer.metal" 2883 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2884 2885 switch (kCopyFormatType) 2886 { 2887 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2888 } 2889 2890 2891} 2892 2893kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2894{ 2895 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2896# 1364 "./copy_buffer.metal" 2897 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2898 2899 switch (kCopyFormatType) 2900 { 2901 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2902 } 2903 2904 2905} 2906 2907kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2908{ 2909 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2910# 1392 "./copy_buffer.metal" 2911 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2912 2913 switch (kCopyFormatType) 2914 { 2915 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2916 } 2917 2918 2919} 2920 2921 2922kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2923{ 2924 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2925# 1439 "./copy_buffer.metal" 2926 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2927 2928 switch (kCopyFormatType) 2929 { 2930 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2931 } 2932 2933 2934} 2935 2936kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2937{ 2938 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2939# 1464 "./copy_buffer.metal" 2940 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2941 2942 switch (kCopyFormatType) 2943 { 2944 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2945 } 2946 2947 2948} 2949 2950kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2951{ 2952 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2953# 1489 "./copy_buffer.metal" 2954 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2955 2956 switch (kCopyFormatType) 2957 { 2958 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2959 } 2960 2961 2962} 2963 2964 2965struct CopyVertexParams 2966{ 2967 uint srcBufferStartOffset; 2968 uint srcStride; 2969 uint srcComponentBytes; 2970 uint srcComponents; 2971 2972 2973 2974 uchar4 srcDefaultAlphaData; 2975 2976 uint dstBufferStartOffset; 2977 uint dstStride; 2978 uint dstComponents; 2979 2980 uint vertexCount; 2981}; 2982# 1539 "./copy_buffer.metal" 2983template <typename IntType> 2984static inline void writeFloatVertex(constant CopyVertexParams &options, 2985 uint idx, 2986 vec<IntType, 4> data, 2987 device uchar *dst) 2988{ 2989 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2990 2991 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2992 { 2993 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 2994 } 2995} 2996 2997template <> 2998inline void writeFloatVertex(constant CopyVertexParams &options, 2999 uint idx, 3000 vec<float, 4> data, 3001 device uchar *dst) 3002{ 3003 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3004 3005 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3006 { 3007 floatToBytes(data[component], dstOffset, dst); 3008 } 3009} 3010 3011 3012static inline void convertToFloatVertexFormat(uint index, 3013 constant CopyVertexParams &options, 3014 constant uchar *srcBuffer, 3015 device uchar *dstBuffer) 3016{ 3017# 1585 "./copy_buffer.metal" 3018 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3019# 1594 "./copy_buffer.metal" 3020 switch (kCopyFormatType) 3021 { 3022 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3023 } 3024 3025 3026} 3027 3028 3029kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3030 constant CopyVertexParams &options [[buffer(0)]], 3031 constant uchar *srcBuffer [[buffer(1)]], 3032 device uchar *dstBuffer [[buffer(2)]]) 3033{ 3034 if (index >= options.vertexCount) { return; }; 3035 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3036} 3037 3038 3039vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3040 constant CopyVertexParams &options [[buffer(0)]], 3041 constant uchar *srcBuffer [[buffer(1)]], 3042 device uchar *dstBuffer [[buffer(2)]]) 3043{ 3044 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3045} 3046 3047 3048static inline void expandVertexFormatComponents(uint index, 3049 constant CopyVertexParams &options, 3050 constant uchar *srcBuffer, 3051 device uchar *dstBuffer) 3052{ 3053 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3054 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3055 3056 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3057 uint component; 3058 for (component = 0; component < options.srcComponents; ++component, 3059 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3060 { 3061 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3062 { 3063 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3064 } 3065 } 3066 3067 for (; component < dstComponentsBeforeAlpha; 3068 ++component, dstOffset += options.srcComponentBytes) 3069 { 3070 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3071 { 3072 dstBuffer[dstOffset + byte] = 0; 3073 } 3074 } 3075 3076 if (component < options.dstComponents) 3077 { 3078 3079 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3080 { 3081 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3082 } 3083 } 3084} 3085 3086 3087kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3088 constant CopyVertexParams &options [[buffer(0)]], 3089 constant uchar *srcBuffer [[buffer(1)]], 3090 device uchar *dstBuffer [[buffer(2)]]) 3091{ 3092 if (index >= options.vertexCount) { return; }; 3093 3094 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3095} 3096 3097 3098vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3099 constant CopyVertexParams &options [[buffer(0)]], 3100 constant uchar *srcBuffer [[buffer(1)]], 3101 device uchar *dstBuffer [[buffer(2)]]) 3102{ 3103 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3104} 3105# 6 "temp_master_source.metal" 2 3106# 1 "./visibility.metal" 1 3107 3108 3109 3110 3111 3112 3113 3114 3115constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3116 3117 3118 3119struct CombineVisibilityResultOptions 3120{ 3121 3122 uint startOffset; 3123 3124 uint numOffsets; 3125}; 3126 3127kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3128 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3129 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3130 device ushort4 *finalResults [[buffer(2)]]) 3131{ 3132 if (idx > 0) 3133 { 3134 3135 3136 3137 return; 3138 } 3139 ushort4 finalResult16x4; 3140 3141 if (kCombineWithExistingResult) 3142 { 3143 finalResult16x4 = finalResults[0]; 3144 } 3145 else 3146 { 3147 finalResult16x4 = ushort4(0, 0, 0, 0); 3148 } 3149 3150 for (uint i = 0; i < options.numOffsets; ++i) 3151 { 3152 uint offset = options.startOffset + i; 3153 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3154 3155 3156 finalResult16x4 = finalResult16x4 | renderpassResult; 3157 } 3158 finalResults[0] = finalResult16x4; 3159} 3160# 7 "temp_master_source.metal" 2 3161# 1 "./rewrite_indices.metal" 1 3162# 11 "./rewrite_indices.metal" 3163# 1 "./rewrite_indices_shared.h" 1 3164# 12 "./rewrite_indices.metal" 2 3165using namespace metal; 3166 3167constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 3168constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 3169constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 3170constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 3171constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 3172constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 3173constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 3174 3175 3176static inline uint readIdx( 3177 const device ushort *indexBufferUint16, 3178 const device uint *indexBufferUint32, 3179 const uint restartIndex, 3180 const uint indexCount, 3181 uint idx, 3182 thread bool &foundRestart, 3183 thread uint &indexThatRestartedFirst 3184 ) 3185{ 3186 uint inIndex = idx; 3187 if(inIndex < indexCount) 3188 { 3189 if(indexBufferIsUint16) 3190 { 3191 inIndex = indexBufferUint16[inIndex]; 3192 } 3193 else if(indexBufferIsUint32) 3194 { 3195 inIndex = indexBufferUint32[inIndex]; 3196 } 3197 } 3198 else 3199 { 3200 foundRestart = true; 3201 indexThatRestartedFirst = idx; 3202 } 3203 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 3204 { 3205 foundRestart = true; 3206 indexThatRestartedFirst = idx; 3207 } 3208 return inIndex; 3209} 3210 3211static inline void outputPrimitive( 3212 const device ushort *indexBufferUint16, 3213 const device uint *indexBufferUint32, 3214 device ushort *outIndexBufferUint16, 3215 device uint *outIndexBufferUint32, 3216 const uint restartIndex, 3217 const uint indexCount, 3218 thread uint &baseIndex, 3219 uint onIndex, 3220 thread uint &onOutIndex 3221 ) 3222{ 3223 if(baseIndex > onIndex) return; 3224 bool foundRestart = false; 3225 uint indexThatRestartedFirst = 0; 3226# 86 "./rewrite_indices.metal" 3227 switch(fixIndexBufferMode) 3228 { 3229 case 0x00U: 3230 { 3231 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 3232 if(foundRestart) 3233 { 3234 baseIndex = indexThatRestartedFirst + 1; 3235 return; 3236 } 3237 3238 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 3239 } 3240 break; 3241 case 0x01U: 3242 { 3243 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3244 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3245 if(foundRestart) 3246 { 3247 baseIndex = indexThatRestartedFirst + 1; 3248 return; 3249 } 3250 if((onIndex - baseIndex) & 1) return; 3251 3252 if(fixIndexBufferKey & 0x00200U) 3253 { 3254 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3255 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3256 } 3257 else 3258 { 3259 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3260 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3261 } 3262 } 3263 break; 3264 case 0x03U: 3265 { 3266 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3267 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3268 if(foundRestart) 3269 { 3270 baseIndex = indexThatRestartedFirst + 1; 3271 return; 3272 } 3273 3274 if(fixIndexBufferKey & 0x00200U) 3275 { 3276 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3277 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3278 } 3279 else 3280 { 3281 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3282 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3283 } 3284 } 3285 break; 3286 case 0x04U: 3287 { 3288 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3289 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3290 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3291 if(foundRestart) 3292 { 3293 baseIndex = indexThatRestartedFirst + 1; 3294 return; 3295 } 3296 if(((onIndex - baseIndex) % 3) != 0) return; 3297 3298 if(fixIndexBufferKey & 0x00200U) 3299 { 3300 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3301 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3302 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3303 } 3304 else 3305 { 3306 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3307 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3308 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3309 } 3310 } 3311 break; 3312 case 0x05U: 3313 { 3314 uint isOdd = ((onIndex - baseIndex) & 1); 3315 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 3316 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 3317 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3318 if(foundRestart) 3319 { 3320 baseIndex = indexThatRestartedFirst + 1; 3321 return; 3322 } 3323 3324 if(fixIndexBufferKey & 0x00200U) 3325 { 3326 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3327 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3328 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3329 } 3330 else 3331 { 3332 3333 if(isOdd) 3334 { 3335 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3336 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3337 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3338 } 3339 else 3340 { 3341 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3342 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3343 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3344 } 3345 } 3346 3347 assert(onOutIndex <= (onIndex + 1) * 3); 3348 assert(onOutIndex <= (indexCount - 2) * 3); 3349 } 3350 break; 3351 3352 } 3353 3354 3355} 3356 3357kernel void fixIndexBuffer( 3358 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 3359 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 3360 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3361 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3362 constant uint &indexCount [[ buffer(2) ]], 3363 constant uint &primCount [[ buffer(3) ]], 3364 uint prim [[thread_position_in_grid]]) 3365{ 3366 constexpr uint restartIndex = 0xFFFFFFFF; 3367 uint baseIndex = 0; 3368 uint onIndex = onIndex; 3369 uint onOutIndex = onOutIndex; 3370 if(prim < primCount) 3371 { 3372 switch(fixIndexBufferMode) 3373 { 3374 case 0x00U: 3375 onIndex = prim; 3376 onOutIndex = prim; 3377 break; 3378 case 0x01U: 3379 onIndex = prim * 2; 3380 onOutIndex = prim * 2; 3381 break; 3382 case 0x03U: 3383 onIndex = prim; 3384 onOutIndex = prim * 2; 3385 break; 3386 case 0x04U: 3387 onIndex = prim * 3; 3388 onOutIndex = prim * 3; 3389 break; 3390 case 0x05U: 3391 onIndex = prim; 3392 onOutIndex = prim * 3; 3393 break; 3394 } 3395 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 3396 } 3397} 3398 3399 3400 3401static inline void generatePrimitive( 3402 device ushort *outIndexBufferUint16, 3403 device uint *outIndexBufferUint32, 3404 const uint firstVertex, 3405 const uint indexCount, 3406 thread uint &baseIndex, 3407 uint onIndex, 3408 uint primCount, 3409 thread uint &onOutIndex 3410 ) 3411{ 3412 if(baseIndex > onIndex) return; 3413# 284 "./rewrite_indices.metal" 3414 switch(fixIndexBufferMode) 3415 { 3416 case 0x00U: 3417 { 3418 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 3419 } 3420 break; 3421 case 0x01U: 3422 { 3423 auto tmpIndex0 = onIndex + 0; 3424 auto tmpIndex1 = onIndex + 1; 3425 if(fixIndexBufferKey & 0x00200U) 3426 { 3427 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3428 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3429 } 3430 else 3431 { 3432 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3433 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3434 } 3435 } 3436 break; 3437 case 0x02U: 3438 { 3439 auto tmpIndex0 = onIndex + 0; 3440 auto tmpIndex1 = (onIndex + 1) % primCount; 3441 if(fixIndexBufferKey & 0x00200U) 3442 { 3443 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3444 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3445 } 3446 else 3447 { 3448 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3449 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3450 } 3451 } 3452 break; 3453 case 0x03U: 3454 { 3455 auto tmpIndex0 = onIndex + 0; 3456 auto tmpIndex1 = onIndex + 1; 3457 if(fixIndexBufferKey & 0x00200U) 3458 { 3459 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3460 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3461 } 3462 else 3463 { 3464 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3465 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3466 } 3467 } 3468 break; 3469 case 0x04U: 3470 { 3471 auto tmpIndex0 = onIndex + 0; 3472 auto tmpIndex1 = onIndex + 1; 3473 auto tmpIndex2 = onIndex + 2; 3474 if(fixIndexBufferKey & 0x00200U) 3475 { 3476 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3477 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3478 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3479 } 3480 else 3481 { 3482 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3483 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3484 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3485 } 3486 } 3487 break; 3488 case 0x05U: 3489 { 3490 uint isOdd = ((onIndex - baseIndex) & 1); 3491 auto tmpIndex0 = onIndex + 0 + isOdd; 3492 auto tmpIndex1 = onIndex + 1 - isOdd; 3493 auto tmpIndex2 = onIndex + 2; 3494 if(fixIndexBufferKey & 0x00200U) 3495 { 3496 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3497 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3498 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3499 } 3500 else 3501 { 3502 if(isOdd) 3503 { 3504 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3505 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3506 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3507 } 3508 else 3509 { 3510 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3511 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3512 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3513 } 3514 } 3515 3516 assert(onOutIndex <= (onIndex + 1) * 3); 3517 assert(onOutIndex <= (indexCount - 2) * 3); 3518 break; 3519 } 3520 case 0x06U: 3521 { 3522 auto tmpIndex0 = 0; 3523 auto tmpIndex1 = onIndex + 1; 3524 auto tmpIndex2 = onIndex + 2; 3525 3526 if(fixIndexBufferKey & 0x00200U) 3527 { 3528 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3529 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3530 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3531 } 3532 else 3533 { 3534 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3535 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3536 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3537 } 3538 } 3539 break; 3540 3541 } 3542 3543} 3544 3545 3546 3547kernel void genIndexBuffer( 3548 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3549 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3550 constant uint &indexCount [[ buffer(2) ]], 3551 constant uint &primCount [[ buffer(3) ]], 3552 constant uint &firstVertex [[ buffer(4) ]], 3553 uint prim [[thread_position_in_grid]]) 3554{ 3555 uint baseIndex = 0; 3556 uint onIndex = onIndex; 3557 uint onOutIndex = onOutIndex; 3558 if(prim < primCount) 3559 { 3560 switch(fixIndexBufferMode) 3561 { 3562 case 0x00U: 3563 onIndex = prim; 3564 onOutIndex = prim; 3565 break; 3566 case 0x01U: 3567 onIndex = prim * 2; 3568 onOutIndex = prim * 2; 3569 break; 3570 case 0x03U: 3571 onIndex = prim; 3572 onOutIndex = prim * 2; 3573 break; 3574 case 0x02U: 3575 onIndex = prim; 3576 onOutIndex = prim * 2; 3577 break; 3578 case 0x04U: 3579 onIndex = prim * 3; 3580 onOutIndex = prim * 3; 3581 break; 3582 case 0x05U: 3583 onIndex = prim; 3584 onOutIndex = prim * 3; 3585 break; 3586 case 0x06U: 3587 onIndex = prim; 3588 onOutIndex = prim * 3; 3589 break; 3590 } 3591 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 3592 } 3593} 3594# 8 "temp_master_source.metal" 2 3595 3596 3597)"; 3598